mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[bugfix] Fix existing bio text showing as HTML (#531)
* fix existing bio text showing as HTML - updated replaced mentions to include instance - strips HTML from account source note in Verify handler - update text formatter to use buffers for string writes Signed-off-by: kim <grufwub@gmail.com> * go away linter Signed-off-by: kim <grufwub@gmail.com> * change buf reset location, change html mention tags Signed-off-by: kim <grufwub@gmail.com> * reduce FindLinks code complexity Signed-off-by: kim <grufwub@gmail.com> * fix HTML to text conversion Signed-off-by: kim <grufwub@gmail.com> * Update internal/regexes/regexes.go Co-authored-by: Mina Galić <mina.galic@puppet.com> * use improved html2text lib with more options Signed-off-by: kim <grufwub@gmail.com> * fix to produce actual plaintext from html Signed-off-by: kim <grufwub@gmail.com> * fix span tags instead written as space Signed-off-by: kim <grufwub@gmail.com> * performance improvements to regex replacements, fix link replace logic for un-html-ing in the future Signed-off-by: kim <grufwub@gmail.com> * fix tag/mention replacements to use input string, fix link replace to not include scheme Signed-off-by: kim <grufwub@gmail.com> * use matched input string for link replace href text Signed-off-by: kim <grufwub@gmail.com> * remove unused code (to appease linter :sobs:) Signed-off-by: kim <grufwub@gmail.com> * improve hashtagFinger regex to be more compliant Signed-off-by: kim <grufwub@gmail.com> * update breakReplacer to include both unix and windows line endings Signed-off-by: kim <grufwub@gmail.com> * add NoteRaw field to Account to store plaintext account bio, add migration for this, set for sensitive accounts Signed-off-by: kim <grufwub@gmail.com> * drop unnecessary code Signed-off-by: kim <grufwub@gmail.com> * update text package tests to fix logic changes Signed-off-by: kim <grufwub@gmail.com> * add raw note content testing to account update and account verify Signed-off-by: kim <grufwub@gmail.com> * remove unused modules Signed-off-by: kim <grufwub@gmail.com> * fix emoji regex Signed-off-by: kim <grufwub@gmail.com> * fix replacement of hashtags Signed-off-by: kim <grufwub@gmail.com> * update code comment Signed-off-by: kim <grufwub@gmail.com> Co-authored-by: Mina Galić <mina.galic@puppet.com>
This commit is contained in:
@@ -19,34 +19,28 @@
|
||||
package text
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
"mvdan.cc/xurls/v2"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/regexes"
|
||||
)
|
||||
|
||||
// schemes is the regex for schemes we accept when looking for links.
|
||||
// Basically, we accept https or http.
|
||||
var schemes = `(((http|https))://)`
|
||||
|
||||
// FindLinks parses the given string looking for recognizable URLs (including scheme).
|
||||
// It returns a list of those URLs, without changing the string, or an error if something goes wrong.
|
||||
// If no URLs are found within the given string, an empty slice and nil will be returned.
|
||||
func FindLinks(in string) ([]*url.URL, error) {
|
||||
rxStrict, err := xurls.StrictMatchingScheme(schemes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
urls := []*url.URL{}
|
||||
func FindLinks(in string) []*url.URL {
|
||||
var urls []*url.URL
|
||||
|
||||
// bail already if we don't find anything
|
||||
found := rxStrict.FindAllString(in, -1)
|
||||
found := regexes.LinkScheme.FindAllString(in, -1)
|
||||
if len(found) == 0 {
|
||||
return urls, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
urlmap := map[string]struct{}{}
|
||||
|
||||
// for each string we find, we want to parse it into a URL if we can
|
||||
// if we fail to parse it, just ignore this match and continue
|
||||
for _, f := range found {
|
||||
@@ -54,29 +48,18 @@ func FindLinks(in string) ([]*url.URL, error) {
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
urls = append(urls, u)
|
||||
}
|
||||
|
||||
// deduplicate the URLs
|
||||
urlsDeduped := []*url.URL{}
|
||||
// Calculate string
|
||||
ustr := u.String()
|
||||
|
||||
for _, u := range urls {
|
||||
if !contains(urlsDeduped, u) {
|
||||
urlsDeduped = append(urlsDeduped, u)
|
||||
if _, ok := urlmap[ustr]; !ok {
|
||||
// Has not been encountered yet
|
||||
urls = append(urls, u)
|
||||
urlmap[ustr] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
return urlsDeduped, nil
|
||||
}
|
||||
|
||||
// contains checks if the given url is already within a slice of URLs
|
||||
func contains(urls []*url.URL, url *url.URL) bool {
|
||||
for _, u := range urls {
|
||||
if u.String() == url.String() {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
return urls
|
||||
}
|
||||
|
||||
// ReplaceLinks replaces all detected links in a piece of text with their HTML (href) equivalents.
|
||||
@@ -84,33 +67,20 @@ func contains(urls []*url.URL, url *url.URL) bool {
|
||||
// href will end up double-formatted, if the text you pass here contains one or more hrefs already.
|
||||
// To avoid this, you should sanitize any HTML out of text before you pass it into this function.
|
||||
func (f *formatter) ReplaceLinks(ctx context.Context, in string) string {
|
||||
rxStrict, err := xurls.StrictMatchingScheme(schemes)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
replaced := rxStrict.ReplaceAllStringFunc(in, func(urlString string) string {
|
||||
return regexes.ReplaceAllStringFunc(regexes.LinkScheme, in, func(urlString string, buf *bytes.Buffer) string {
|
||||
thisURL, err := url.Parse(urlString)
|
||||
if err != nil {
|
||||
return urlString // we can't parse it as a URL so don't replace it
|
||||
}
|
||||
|
||||
shortString := thisURL.Hostname()
|
||||
|
||||
if thisURL.Path != "" {
|
||||
shortString += thisURL.Path
|
||||
}
|
||||
|
||||
if thisURL.Fragment != "" {
|
||||
shortString = shortString + "#" + thisURL.Fragment
|
||||
}
|
||||
|
||||
if thisURL.RawQuery != "" {
|
||||
shortString = shortString + "?" + thisURL.RawQuery
|
||||
}
|
||||
|
||||
replacement := fmt.Sprintf(`<a href="%s" rel="noopener">%s</a>`, urlString, shortString)
|
||||
return replacement
|
||||
// <a href="thisURL.String()" rel="noopener">urlString</a>
|
||||
urlString = thisURL.String()
|
||||
buf.WriteString(`<a href="`)
|
||||
buf.WriteString(thisURL.String())
|
||||
buf.WriteString(`" rel="noopener">`)
|
||||
urlString = strings.TrimPrefix(urlString, thisURL.Scheme)
|
||||
urlString = strings.TrimPrefix(urlString, "://")
|
||||
buf.WriteString(urlString)
|
||||
buf.WriteString(`</a>`)
|
||||
return buf.String()
|
||||
})
|
||||
return replaced
|
||||
}
|
||||
|
Reference in New Issue
Block a user