mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[bugfix] Use better plaintext representation of status for filtering (#3301)
* [bugfix] Use better plaintext representation of status for filtering * add new deps to readme * lint * update tests * update regexes * address review comments * remove now unused xxhash * whoops, wrong logger * Merge branch 'main' into status_filtering_bugfix * put cache in caches struct * pain
This commit is contained in:
@ -27,6 +27,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/k3a/html2text"
|
||||
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/config"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
||||
@ -284,3 +285,64 @@ func ContentToContentLanguage(
|
||||
|
||||
return contentStr, langTagStr
|
||||
}
|
||||
|
||||
// filterableFields returns text fields from
|
||||
// a status that we might want to filter on:
|
||||
//
|
||||
// - content warning
|
||||
// - content (converted to plaintext from HTML)
|
||||
// - media descriptions
|
||||
// - poll options
|
||||
//
|
||||
// Each field should be filtered separately.
|
||||
// This avoids scenarios where false-positive
|
||||
// multiple-word matches can be made by matching
|
||||
// the last word of one field + the first word
|
||||
// of the next field together.
|
||||
func filterableFields(s *gtsmodel.Status) []string {
|
||||
// Estimate length of fields.
|
||||
fieldCount := 2 + len(s.Attachments)
|
||||
if s.Poll != nil {
|
||||
fieldCount += len(s.Poll.Options)
|
||||
}
|
||||
fields := make([]string, 0, fieldCount)
|
||||
|
||||
// Content warning / title.
|
||||
if s.ContentWarning != "" {
|
||||
fields = append(fields, s.ContentWarning)
|
||||
}
|
||||
|
||||
// Status content. Though we have raw text
|
||||
// available for statuses created on our
|
||||
// instance, use the html2text version to
|
||||
// remove markdown-formatting characters
|
||||
// and ensure more consistent filtering.
|
||||
if s.Content != "" {
|
||||
text := html2text.HTML2TextWithOptions(
|
||||
s.Content,
|
||||
html2text.WithLinksInnerText(),
|
||||
html2text.WithUnixLineBreaks(),
|
||||
)
|
||||
if text != "" {
|
||||
fields = append(fields, text)
|
||||
}
|
||||
}
|
||||
|
||||
// Media descriptions.
|
||||
for _, attachment := range s.Attachments {
|
||||
if attachment.Description != "" {
|
||||
fields = append(fields, attachment.Description)
|
||||
}
|
||||
}
|
||||
|
||||
// Poll options.
|
||||
if s.Poll != nil {
|
||||
for _, opt := range s.Poll.Options {
|
||||
if opt != "" {
|
||||
fields = append(fields, opt)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return fields
|
||||
}
|
||||
|
Reference in New Issue
Block a user