2023-03-12 16:00:57 +01:00
|
|
|
// GoToSocial
|
|
|
|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
|
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2022-12-16 12:20:22 +01:00
|
|
|
|
|
|
|
package text
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2023-02-03 11:58:58 +01:00
|
|
|
"fmt"
|
|
|
|
"strings"
|
2022-12-16 12:20:22 +01:00
|
|
|
|
2023-02-03 11:58:58 +01:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/db"
|
2022-12-16 12:20:22 +01:00
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/log"
|
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/regexes"
|
|
|
|
"github.com/superseriousbusiness/gotosocial/internal/util"
|
|
|
|
"github.com/yuin/goldmark"
|
|
|
|
"github.com/yuin/goldmark/ast"
|
|
|
|
"github.com/yuin/goldmark/parser"
|
|
|
|
"github.com/yuin/goldmark/renderer"
|
|
|
|
"github.com/yuin/goldmark/text"
|
|
|
|
mdutil "github.com/yuin/goldmark/util"
|
|
|
|
)
|
|
|
|
|
|
|
|
// A goldmark extension that parses potential mentions and hashtags separately from regular
|
|
|
|
// text, so that they stay as one contiguous text fragment in the AST, and then renders
|
|
|
|
// them separately too, to avoid scanning normal text for mentions and tags.
|
|
|
|
|
|
|
|
// mention and hashtag fulfil the goldmark ast.Node interface.
|
|
|
|
type mention struct {
|
|
|
|
ast.BaseInline
|
|
|
|
Segment text.Segment
|
|
|
|
}
|
|
|
|
|
|
|
|
type hashtag struct {
|
|
|
|
ast.BaseInline
|
|
|
|
Segment text.Segment
|
|
|
|
}
|
|
|
|
|
2023-02-03 11:58:58 +01:00
|
|
|
type emoji struct {
|
|
|
|
ast.BaseInline
|
|
|
|
Segment text.Segment
|
|
|
|
}
|
|
|
|
|
2023-02-17 12:02:29 +01:00
|
|
|
var (
|
|
|
|
kindMention = ast.NewNodeKind("Mention")
|
|
|
|
kindHashtag = ast.NewNodeKind("Hashtag")
|
|
|
|
kindEmoji = ast.NewNodeKind("Emoji")
|
|
|
|
)
|
2022-12-16 12:20:22 +01:00
|
|
|
|
|
|
|
func (n *mention) Kind() ast.NodeKind {
|
|
|
|
return kindMention
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *hashtag) Kind() ast.NodeKind {
|
|
|
|
return kindHashtag
|
|
|
|
}
|
|
|
|
|
2023-02-03 11:58:58 +01:00
|
|
|
func (n *emoji) Kind() ast.NodeKind {
|
|
|
|
return kindEmoji
|
|
|
|
}
|
|
|
|
|
|
|
|
// Dump can be used for debugging.
|
2022-12-16 12:20:22 +01:00
|
|
|
func (n *mention) Dump(source []byte, level int) {
|
2023-02-03 11:58:58 +01:00
|
|
|
fmt.Printf("%sMention: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
2022-12-16 12:20:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (n *hashtag) Dump(source []byte, level int) {
|
2023-02-03 11:58:58 +01:00
|
|
|
fmt.Printf("%sHashtag: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (n *emoji) Dump(source []byte, level int) {
|
|
|
|
fmt.Printf("%sEmoji: %s\n", strings.Repeat(" ", level), string(n.Segment.Value(source)))
|
2022-12-16 12:20:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// newMention and newHashtag create a goldmark ast.Node from a goldmark text.Segment.
|
|
|
|
// The contained segment is used in rendering.
|
|
|
|
func newMention(s text.Segment) *mention {
|
|
|
|
return &mention{
|
|
|
|
BaseInline: ast.BaseInline{},
|
|
|
|
Segment: s,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func newHashtag(s text.Segment) *hashtag {
|
|
|
|
return &hashtag{
|
|
|
|
BaseInline: ast.BaseInline{},
|
|
|
|
Segment: s,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-03 11:58:58 +01:00
|
|
|
func newEmoji(s text.Segment) *emoji {
|
|
|
|
return &emoji{
|
|
|
|
BaseInline: ast.BaseInline{},
|
|
|
|
Segment: s,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-16 12:20:22 +01:00
|
|
|
// mentionParser and hashtagParser fulfil the goldmark parser.InlineParser interface.
|
2023-02-17 12:02:29 +01:00
|
|
|
type mentionParser struct{}
|
2022-12-16 12:20:22 +01:00
|
|
|
|
2023-02-17 12:02:29 +01:00
|
|
|
type hashtagParser struct{}
|
2022-12-16 12:20:22 +01:00
|
|
|
|
2023-02-17 12:02:29 +01:00
|
|
|
type emojiParser struct{}
|
2023-02-03 11:58:58 +01:00
|
|
|
|
2022-12-16 12:20:22 +01:00
|
|
|
func (p *mentionParser) Trigger() []byte {
|
|
|
|
return []byte{'@'}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *hashtagParser) Trigger() []byte {
|
|
|
|
return []byte{'#'}
|
|
|
|
}
|
|
|
|
|
2023-02-03 11:58:58 +01:00
|
|
|
func (p *emojiParser) Trigger() []byte {
|
|
|
|
return []byte{':'}
|
|
|
|
}
|
|
|
|
|
2022-12-16 12:20:22 +01:00
|
|
|
func (p *mentionParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
|
|
|
before := block.PrecendingCharacter()
|
|
|
|
line, segment := block.PeekLine()
|
|
|
|
|
2023-02-03 11:58:58 +01:00
|
|
|
if !util.IsMentionOrHashtagBoundary(before) {
|
2022-12-16 12:20:22 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// unideal for performance but makes use of existing regex
|
|
|
|
loc := regexes.MentionFinder.FindIndex(line)
|
|
|
|
switch {
|
|
|
|
case loc == nil:
|
|
|
|
fallthrough
|
|
|
|
case loc[0] != 0: // fail if not found at start
|
|
|
|
return nil
|
|
|
|
default:
|
|
|
|
block.Advance(loc[1])
|
|
|
|
return newMention(segment.WithStop(segment.Start + loc[1]))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
|
|
|
before := block.PrecendingCharacter()
|
|
|
|
line, segment := block.PeekLine()
|
|
|
|
s := string(line)
|
|
|
|
|
2023-02-03 11:58:58 +01:00
|
|
|
if !util.IsMentionOrHashtagBoundary(before) || len(s) == 1 {
|
2022-12-16 12:20:22 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
for i, r := range s {
|
|
|
|
switch {
|
|
|
|
case r == '#' && i == 0:
|
2023-02-03 11:58:58 +01:00
|
|
|
// ignore initial #
|
2022-12-16 12:20:22 +01:00
|
|
|
continue
|
2023-02-03 11:58:58 +01:00
|
|
|
case !util.IsPlausiblyInHashtag(r) && !util.IsMentionOrHashtagBoundary(r):
|
2022-12-16 12:20:22 +01:00
|
|
|
// Fake hashtag, don't trust it
|
|
|
|
return nil
|
2023-02-03 11:58:58 +01:00
|
|
|
case util.IsMentionOrHashtagBoundary(r):
|
|
|
|
if i <= 1 {
|
|
|
|
// empty
|
|
|
|
return nil
|
|
|
|
}
|
2022-12-16 12:20:22 +01:00
|
|
|
// End of hashtag
|
|
|
|
block.Advance(i)
|
|
|
|
return newHashtag(segment.WithStop(segment.Start + i))
|
|
|
|
}
|
|
|
|
}
|
2023-02-03 11:58:58 +01:00
|
|
|
// If we don't find invalid characters before the end of the line then it's all hashtag, babey
|
|
|
|
block.Advance(segment.Len())
|
2022-12-16 12:20:22 +01:00
|
|
|
return newHashtag(segment)
|
|
|
|
}
|
|
|
|
|
2023-02-03 11:58:58 +01:00
|
|
|
func (p *emojiParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
|
|
|
line, segment := block.PeekLine()
|
|
|
|
|
|
|
|
// unideal for performance but makes use of existing regex
|
|
|
|
loc := regexes.EmojiFinder.FindIndex(line)
|
|
|
|
switch {
|
|
|
|
case loc == nil:
|
|
|
|
fallthrough
|
|
|
|
case loc[0] != 0: // fail if not found at start
|
|
|
|
return nil
|
|
|
|
default:
|
|
|
|
block.Advance(loc[1])
|
|
|
|
return newEmoji(segment.WithStop(segment.Start + loc[1]))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-16 12:20:22 +01:00
|
|
|
// customRenderer fulfils both the renderer.NodeRenderer and goldmark.Extender interfaces.
|
2023-02-03 11:58:58 +01:00
|
|
|
// It is created in FromMarkdown and FromPlain to be used as a goldmark extension, and the
|
|
|
|
// fields are used to report tags and mentions to the caller for use as metadata.
|
2022-12-16 12:20:22 +01:00
|
|
|
type customRenderer struct {
|
2023-02-03 11:58:58 +01:00
|
|
|
f *formatter
|
|
|
|
ctx context.Context
|
|
|
|
parseMention gtsmodel.ParseMentionFunc
|
|
|
|
accountID string
|
|
|
|
statusID string
|
|
|
|
emojiOnly bool
|
|
|
|
result *FormatResult
|
2022-12-16 12:20:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (r *customRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
|
|
|
|
reg.Register(kindMention, r.renderMention)
|
|
|
|
reg.Register(kindHashtag, r.renderHashtag)
|
2023-02-03 11:58:58 +01:00
|
|
|
reg.Register(kindEmoji, r.renderEmoji)
|
2022-12-16 12:20:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (r *customRenderer) Extend(m goldmark.Markdown) {
|
2023-02-03 11:58:58 +01:00
|
|
|
// 1000 is set as the lowest priority, but it's arbitrary
|
2022-12-16 12:20:22 +01:00
|
|
|
m.Parser().AddOptions(parser.WithInlineParsers(
|
2023-02-03 11:58:58 +01:00
|
|
|
mdutil.Prioritized(&emojiParser{}, 1000),
|
2022-12-16 12:20:22 +01:00
|
|
|
))
|
2023-02-03 11:58:58 +01:00
|
|
|
if !r.emojiOnly {
|
|
|
|
m.Parser().AddOptions(parser.WithInlineParsers(
|
|
|
|
mdutil.Prioritized(&mentionParser{}, 1000),
|
|
|
|
mdutil.Prioritized(&hashtagParser{}, 1000),
|
|
|
|
))
|
|
|
|
}
|
2022-12-16 12:20:22 +01:00
|
|
|
m.Renderer().AddOptions(renderer.WithNodeRenderers(
|
2023-02-03 11:58:58 +01:00
|
|
|
mdutil.Prioritized(r, 1000),
|
2022-12-16 12:20:22 +01:00
|
|
|
))
|
|
|
|
}
|
|
|
|
|
|
|
|
// renderMention and renderHashtag take a mention or a hashtag ast.Node and render it as HTML.
|
|
|
|
func (r *customRenderer) renderMention(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
|
|
|
if !entering {
|
2023-02-03 11:58:58 +01:00
|
|
|
return ast.WalkSkipChildren, nil
|
2022-12-16 12:20:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
n, ok := node.(*mention) // this function is only registered for kindMention
|
|
|
|
if !ok {
|
2023-02-17 12:02:29 +01:00
|
|
|
log.Panic(nil, "type assertion failed")
|
2022-12-16 12:20:22 +01:00
|
|
|
}
|
|
|
|
text := string(n.Segment.Value(source))
|
|
|
|
|
2023-02-03 11:58:58 +01:00
|
|
|
html := r.replaceMention(text)
|
2022-12-16 12:20:22 +01:00
|
|
|
|
|
|
|
// we don't have much recourse if this fails
|
|
|
|
if _, err := w.WriteString(html); err != nil {
|
2023-02-17 12:02:29 +01:00
|
|
|
log.Errorf(nil, "error writing HTML: %s", err)
|
2022-12-16 12:20:22 +01:00
|
|
|
}
|
2023-02-03 11:58:58 +01:00
|
|
|
return ast.WalkSkipChildren, nil
|
2022-12-16 12:20:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (r *customRenderer) renderHashtag(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
|
|
|
if !entering {
|
2023-02-03 11:58:58 +01:00
|
|
|
return ast.WalkSkipChildren, nil
|
2022-12-16 12:20:22 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
n, ok := node.(*hashtag) // this function is only registered for kindHashtag
|
|
|
|
if !ok {
|
2023-02-17 12:02:29 +01:00
|
|
|
log.Panic(nil, "type assertion failed")
|
2022-12-16 12:20:22 +01:00
|
|
|
}
|
|
|
|
text := string(n.Segment.Value(source))
|
|
|
|
|
2023-02-03 11:58:58 +01:00
|
|
|
html := r.replaceHashtag(text)
|
2022-12-16 12:20:22 +01:00
|
|
|
|
2023-02-03 11:58:58 +01:00
|
|
|
_, err := w.WriteString(html)
|
2022-12-16 12:20:22 +01:00
|
|
|
// we don't have much recourse if this fails
|
2023-02-03 11:58:58 +01:00
|
|
|
if err != nil {
|
2023-02-17 12:02:29 +01:00
|
|
|
log.Errorf(nil, "error writing HTML: %s", err)
|
2023-02-03 11:58:58 +01:00
|
|
|
}
|
|
|
|
return ast.WalkSkipChildren, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// renderEmoji doesn't turn an emoji into HTML, but adds it to the metadata.
|
|
|
|
func (r *customRenderer) renderEmoji(w mdutil.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
|
|
|
|
if !entering {
|
|
|
|
return ast.WalkSkipChildren, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
n, ok := node.(*emoji) // this function is only registered for kindEmoji
|
|
|
|
if !ok {
|
2023-02-17 12:02:29 +01:00
|
|
|
log.Panic(nil, "type assertion failed")
|
2023-02-03 11:58:58 +01:00
|
|
|
}
|
|
|
|
text := string(n.Segment.Value(source))
|
|
|
|
shortcode := text[1 : len(text)-1]
|
|
|
|
|
|
|
|
emoji, err := r.f.db.GetEmojiByShortcodeDomain(r.ctx, shortcode, "")
|
|
|
|
if err != nil {
|
|
|
|
if err != db.ErrNoEntries {
|
2023-02-17 12:02:29 +01:00
|
|
|
log.Errorf(nil, "error getting local emoji with shortcode %s: %s", shortcode, err)
|
2023-02-03 11:58:58 +01:00
|
|
|
}
|
|
|
|
} else if *emoji.VisibleInPicker && !*emoji.Disabled {
|
|
|
|
listed := false
|
|
|
|
for _, e := range r.result.Emojis {
|
|
|
|
if e.Shortcode == emoji.Shortcode {
|
|
|
|
listed = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !listed {
|
|
|
|
r.result.Emojis = append(r.result.Emojis, emoji)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// we don't have much recourse if this fails
|
|
|
|
if _, err := w.WriteString(text); err != nil {
|
2023-02-17 12:02:29 +01:00
|
|
|
log.Errorf(nil, "error writing HTML: %s", err)
|
2022-12-16 12:20:22 +01:00
|
|
|
}
|
2023-02-03 11:58:58 +01:00
|
|
|
return ast.WalkSkipChildren, nil
|
2022-12-16 12:20:22 +01:00
|
|
|
}
|