mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[bugfix] Fix existing bio text showing as HTML (#531)
* fix existing bio text showing as HTML - updated replaced mentions to include instance - strips HTML from account source note in Verify handler - update text formatter to use buffers for string writes Signed-off-by: kim <grufwub@gmail.com> * go away linter Signed-off-by: kim <grufwub@gmail.com> * change buf reset location, change html mention tags Signed-off-by: kim <grufwub@gmail.com> * reduce FindLinks code complexity Signed-off-by: kim <grufwub@gmail.com> * fix HTML to text conversion Signed-off-by: kim <grufwub@gmail.com> * Update internal/regexes/regexes.go Co-authored-by: Mina Galić <mina.galic@puppet.com> * use improved html2text lib with more options Signed-off-by: kim <grufwub@gmail.com> * fix to produce actual plaintext from html Signed-off-by: kim <grufwub@gmail.com> * fix span tags instead written as space Signed-off-by: kim <grufwub@gmail.com> * performance improvements to regex replacements, fix link replace logic for un-html-ing in the future Signed-off-by: kim <grufwub@gmail.com> * fix tag/mention replacements to use input string, fix link replace to not include scheme Signed-off-by: kim <grufwub@gmail.com> * use matched input string for link replace href text Signed-off-by: kim <grufwub@gmail.com> * remove unused code (to appease linter :sobs:) Signed-off-by: kim <grufwub@gmail.com> * improve hashtagFinger regex to be more compliant Signed-off-by: kim <grufwub@gmail.com> * update breakReplacer to include both unix and windows line endings Signed-off-by: kim <grufwub@gmail.com> * add NoteRaw field to Account to store plaintext account bio, add migration for this, set for sensitive accounts Signed-off-by: kim <grufwub@gmail.com> * drop unnecessary code Signed-off-by: kim <grufwub@gmail.com> * update text package tests to fix logic changes Signed-off-by: kim <grufwub@gmail.com> * add raw note content testing to account update and account verify Signed-off-by: kim <grufwub@gmail.com> * remove unused modules Signed-off-by: kim <grufwub@gmail.com> * fix emoji regex Signed-off-by: kim <grufwub@gmail.com> * fix replacement of hashtags Signed-off-by: kim <grufwub@gmail.com> * update code comment Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: Mina Galić <mina.galic@puppet.com>
This commit is contained in:
@@ -19,8 +19,12 @@
|
||||
package regexes
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"sync"
|
||||
|
||||
"mvdan.cc/xurls/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -47,6 +51,16 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
schemes = `(http|https)://`
|
||||
// LinkScheme captures http/https schemes in URLs.
|
||||
LinkScheme = func() *regexp.Regexp {
|
||||
rgx, err := xurls.StrictMatchingScheme(schemes)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return rgx
|
||||
}()
|
||||
|
||||
mentionName = `^@(\w+)(?:@([a-zA-Z0-9_\-\.:]+))?$`
|
||||
// MentionName captures the username and domain part from a mention string
|
||||
// such as @whatever_user@example.org, returning whatever_user and example.org (without the @ symbols)
|
||||
@@ -58,7 +72,7 @@ var (
|
||||
MentionFinder = regexp.MustCompile(mentionFinder)
|
||||
|
||||
// hashtag regex can be played with here: https://regex101.com/r/bPxeca/1
|
||||
hashtagFinder = fmt.Sprintf(`(?:^|\n|\s)(#[a-zA-Z0-9]{1,%d})(?:\b)`, maximumHashtagLength)
|
||||
hashtagFinder = fmt.Sprintf(`(?:^|\s)(?:#*)(#[a-zA-Z0-9]{1,%d})(?:#|\b)`, maximumHashtagLength)
|
||||
// HashtagFinder finds possible hashtags in a string.
|
||||
// It returns just the string part of the hashtag, not the # symbol.
|
||||
HashtagFinder = regexp.MustCompile(hashtagFinder)
|
||||
@@ -68,7 +82,7 @@ var (
|
||||
EmojiShortcode = regexp.MustCompile(fmt.Sprintf("^%s$", emojiShortcode))
|
||||
|
||||
// emoji regex can be played with here: https://regex101.com/r/478XGM/1
|
||||
emojiFinderString = fmt.Sprintf(`(?:\B)?:(%s):(?:\B)?`, emojiShortcode)
|
||||
emojiFinderString = fmt.Sprintf(`(?:\b)?:(%s):(?:\b)?`, emojiShortcode)
|
||||
// EmojiFinder extracts emoji strings from a piece of text.
|
||||
EmojiFinder = regexp.MustCompile(emojiFinderString)
|
||||
|
||||
@@ -134,3 +148,21 @@ var (
|
||||
// from eg /users/example_username/blocks/01F7XT5JZW1WMVSW1KADS8PVDH
|
||||
BlockPath = regexp.MustCompile(blockPath)
|
||||
)
|
||||
|
||||
// bufpool is a memory pool of byte buffers for use in our regex utility functions.
|
||||
var bufpool = sync.Pool{
|
||||
New: func() any {
|
||||
buf := bytes.NewBuffer(make([]byte, 0, 512))
|
||||
return buf
|
||||
},
|
||||
}
|
||||
|
||||
// ReplaceAllStringFunc will call through to .ReplaceAllStringFunc in the provided regex, but provide you a clean byte buffer for optimized string writes.
|
||||
func ReplaceAllStringFunc(rgx *regexp.Regexp, src string, repl func(match string, buf *bytes.Buffer) string) string {
|
||||
buf := bufpool.Get().(*bytes.Buffer) //nolint
|
||||
defer bufpool.Put(buf)
|
||||
return rgx.ReplaceAllStringFunc(src, func(match string) string {
|
||||
buf.Reset() // reset use
|
||||
return repl(match, buf)
|
||||
})
|
||||
}
|
||||
|
Reference in New Issue
Block a user