[bugfix] Use better plaintext representation of status for filtering (#3301)

* [bugfix] Use better plaintext representation of status for filtering

* add new deps to readme

* lint

* update tests

* update regexes

* address review comments

* remove now unused xxhash

* whoops, wrong logger

* Merge branch 'main' into status_filtering_bugfix

* put cache in caches struct

* pain
This commit is contained in:
tobi
2024-09-16 14:00:23 +02:00
committed by GitHub
parent 6dd936fbe1
commit efd1a4f717
15 changed files with 2685 additions and 64 deletions

View File

@@ -20,6 +20,8 @@ package gtsmodel
import (
"regexp"
"time"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
// Filter stores a filter created by a local account.
@@ -61,14 +63,23 @@ type FilterKeyword struct {
// Compile will compile this FilterKeyword as a prepared regular expression.
func (k *FilterKeyword) Compile() (err error) {
var wordBreak string
if k.WholeWord != nil && *k.WholeWord {
wordBreak = `\b`
var (
wordBreakStart string
wordBreakEnd string
)
if util.PtrOrZero(k.WholeWord) {
// Either word boundary or
// whitespace or start of line.
wordBreakStart = `(?:\b|\s|^)`
// Either word boundary or
// whitespace or end of line.
wordBreakEnd = `(?:\b|\s|$)`
}
// Compile keyword filter regexp.
quoted := regexp.QuoteMeta(k.Keyword)
k.Regexp, err = regexp.Compile(`(?i)` + wordBreak + quoted + wordBreak)
k.Regexp, err = regexp.Compile(`(?i)` + wordBreakStart + quoted + wordBreakEnd)
return // caller is expected to wrap this error
}