[feature] Parse content warning to HTML, serialize via client API as plaintext (#3876)

* [feature] Parse content warning as HTML, serialize via API to plaintext

* tidy up some cruft

* whoops

* oops

* i'm da joker baybee

* clemency muy lorde

* rename some of the text functions for clarity

* jiggle the opts

* fiddle de deee

* hopefully the last test fix i ever have to do in my beautiful life
This commit is contained in:
tobi
2025-03-07 15:04:34 +01:00
committed by GitHub
parent 424f62dd70
commit d8113c11e4
48 changed files with 985 additions and 635 deletions

View File

@@ -43,10 +43,10 @@ func (suite *GetRSSTestSuite) TestGetAccountRSSAdmin() {
<pubDate>Wed, 20 Oct 2021 10:41:37 +0000</pubDate>
<lastBuildDate>Wed, 20 Oct 2021 10:41:37 +0000</lastBuildDate>
<item>
<title>open to see some puppies</title>
<title>open to see some &lt;strong&gt;puppies&lt;/strong&gt;</title>
<link>http://localhost:8080/@admin/statuses/01F8MHAAY43M6RJ473VQFCVH37</link>
<description>@admin@localhost:8080 made a new post: &#34;🐕🐕🐕🐕🐕&#34;</description>
<content:encoded><![CDATA[🐕🐕🐕🐕🐕]]></content:encoded>
<content:encoded><![CDATA[<p>🐕🐕🐕🐕🐕</p>]]></content:encoded>
<author>@admin@localhost:8080</author>
<guid isPermaLink="true">http://localhost:8080/@admin/statuses/01F8MHAAY43M6RJ473VQFCVH37</guid>
<pubDate>Wed, 20 Oct 2021 12:36:45 +0000</pubDate>
@@ -56,7 +56,7 @@ func (suite *GetRSSTestSuite) TestGetAccountRSSAdmin() {
<title>hello world! #welcome ! first post on the instance :rainbow: !</title>
<link>http://localhost:8080/@admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R</link>
<description>@admin@localhost:8080 posted 1 attachment: &#34;hello world! #welcome ! first post on the instance :rainbow: !&#34;</description>
<content:encoded><![CDATA[hello world! #welcome ! first post on the instance <img src="http://localhost:8080/fileserver/01AY6P665V14JJR0AFVRT7311Y/emoji/original/01F8MH9H8E4VG3KDYJR9EGPXCQ.png" title=":rainbow:" alt=":rainbow:" width="25" height="25" /> !]]></content:encoded>
<content:encoded><![CDATA[<p>hello world! <a href="http://localhost:8080/tags/welcome" class="mention hashtag" rel="tag nofollow noreferrer noopener" target="_blank">#<span>welcome</span></a> ! first post on the instance <img src="http://localhost:8080/fileserver/01AY6P665V14JJR0AFVRT7311Y/emoji/original/01F8MH9H8E4VG3KDYJR9EGPXCQ.png" title=":rainbow:" alt=":rainbow:" width="25" height="25" /> !</p>]]></content:encoded>
<author>@admin@localhost:8080</author>
<enclosure url="http://localhost:8080/fileserver/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg" length="62529" type="image/jpeg"></enclosure>
<guid isPermaLink="true">http://localhost:8080/@admin/statuses/01F8MH75CBF9JFX4ZAD54N0W0R</guid>
@@ -145,7 +145,7 @@ func (suite *GetRSSTestSuite) TestGetAccountRSSZork() {
<title>introduction post</title>
<link>http://localhost:8080/@the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY</link>
<description>@the_mighty_zork@localhost:8080 made a new post: &#34;hello everyone!&#34;</description>
<content:encoded><![CDATA[hello everyone!]]></content:encoded>
<content:encoded><![CDATA[<p>hello everyone!</p>]]></content:encoded>
<author>@the_mighty_zork@localhost:8080</author>
<guid isPermaLink="true">http://localhost:8080/@the_mighty_zork/statuses/01F8MHAMCHF6Y650WCRSCP4WMY</guid>
<pubDate>Wed, 20 Oct 2021 10:40:37 +0000</pubDate>

View File

@@ -97,8 +97,8 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
return nil, gtserror.NewErrorBadRequest(err, err.Error())
}
// Parse new display name (always from plaintext).
account.DisplayName = text.SanitizeToPlaintext(displayName)
// HTML tags not allowed in display name.
account.DisplayName = text.StripHTMLFromText(displayName)
acctColumns = append(acctColumns, "display_name")
}
@@ -145,7 +145,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
}
if form.AvatarDescription != nil {
desc := text.SanitizeToPlaintext(*form.AvatarDescription)
desc := text.StripHTMLFromText(*form.AvatarDescription)
form.AvatarDescription = &desc
}
@@ -175,7 +175,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
}
if form.HeaderDescription != nil {
desc := text.SanitizeToPlaintext(*form.HeaderDescription)
desc := text.StripHTMLFromText(*form.HeaderDescription)
form.HeaderDescription = util.Ptr(desc)
}
@@ -265,7 +265,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
return nil, gtserror.NewErrorBadRequest(err, err.Error())
}
account.Settings.CustomCSS = text.SanitizeToPlaintext(customCSS)
account.Settings.CustomCSS = text.StripHTMLFromText(customCSS)
settingsColumns = append(settingsColumns, "custom_css")
}
@@ -356,8 +356,8 @@ func (p *Processor) updateFields(
// Sanitize raw field values.
fieldRaw := &gtsmodel.Field{
Name: text.SanitizeToPlaintext(name),
Value: text.SanitizeToPlaintext(value),
Name: text.StripHTMLFromText(name),
Value: text.StripHTMLFromText(value),
}
fieldsRaw = append(fieldsRaw, fieldRaw)
}
@@ -385,7 +385,7 @@ func (p *Processor) processAccountText(
emojis := make(map[string]*gtsmodel.Emoji)
// Retrieve display name emojis.
for _, emoji := range p.formatter.FromPlainEmojiOnly(
for _, emoji := range p.formatter.FromPlainBasic(
ctx,
p.parseMention,
account.ID,
@@ -413,7 +413,7 @@ func (p *Processor) processAccountText(
// Name stays plain, but we still need to
// see if there are any emojis set in it.
field.Name = fieldRaw.Name
for _, emoji := range p.formatter.FromPlainEmojiOnly(
for _, emoji := range p.formatter.FromPlainBasic(
ctx,
p.parseMention,
account.ID,

View File

@@ -53,8 +53,8 @@ func (p *Processor) createDomainAllow(
ID: id.NewULID(),
Domain: domain,
CreatedByAccountID: adminAcct.ID,
PrivateComment: text.SanitizeToPlaintext(privateComment),
PublicComment: text.SanitizeToPlaintext(publicComment),
PrivateComment: text.StripHTMLFromText(privateComment),
PublicComment: text.StripHTMLFromText(publicComment),
Obfuscate: &obfuscate,
SubscriptionID: subscriptionID,
}

View File

@@ -53,8 +53,8 @@ func (p *Processor) createDomainBlock(
ID: id.NewULID(),
Domain: domain,
CreatedByAccountID: adminAcct.ID,
PrivateComment: text.SanitizeToPlaintext(privateComment),
PublicComment: text.SanitizeToPlaintext(publicComment),
PrivateComment: text.StripHTMLFromText(privateComment),
PublicComment: text.StripHTMLFromText(publicComment),
Obfuscate: &obfuscate,
SubscriptionID: subscriptionID,
}

View File

@@ -165,7 +165,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
}
// Don't allow html in site title.
instance.Title = text.SanitizeToPlaintext(title)
instance.Title = text.StripHTMLFromText(title)
columns = append(columns, "title")
}
@@ -235,7 +235,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
return nil, gtserror.NewErrorBadRequest(err, err.Error())
}
instance.CustomCSS = text.SanitizeToPlaintext(customCSS)
instance.CustomCSS = text.StripHTMLFromText(customCSS)
columns = append(columns, []string{"custom_css"}...)
}

View File

@@ -87,7 +87,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, media
// processDescription will sanitize and valid description against server configuration.
func processDescription(description string) (string, gtserror.WithCode) {
description = text.SanitizeToPlaintext(description)
description = text.StripHTMLFromText(description)
chars := len([]rune(description))
if min := config.GetMediaDescriptionMinChars(); chars < min {

View File

@@ -171,19 +171,25 @@ func (p *Processor) processContent(
)
}
// format is the currently set text formatting
// function, according to the provided content-type.
var format text.FormatFunc
var (
// format is the currently set text formatting
// function, according to the provided content-type.
format text.FormatFunc
// formatCW is like format, but for content warning.
formatCW text.FormatFunc
)
switch contentType {
// Format status according to text/plain.
case gtsmodel.StatusContentTypePlain:
format = p.formatter.FromPlain
formatCW = p.formatter.FromPlainBasic
// Format status according to text/markdown.
case gtsmodel.StatusContentTypeMarkdown:
format = p.formatter.FromMarkdown
formatCW = p.formatter.FromMarkdownBasic
// Unknown.
default:
@@ -215,26 +221,23 @@ func (p *Processor) processContent(
status.Emojis = contentRes.Emojis
status.Tags = contentRes.Tags
// From here-on-out just use emoji-only
// plain-text formatting as the FormatFunc.
format = p.formatter.FromPlainEmojiOnly
// Sanitize content warning and format.
warning := text.SanitizeToPlaintext(contentWarning)
warningRes := formatInput(format, warning)
cwRes := formatInput(formatCW, contentWarning)
// Gather results of the formatted.
status.ContentWarning = warningRes.HTML
status.Emojis = append(status.Emojis, warningRes.Emojis...)
status.ContentWarning = cwRes.HTML
status.Emojis = append(status.Emojis, cwRes.Emojis...)
if poll != nil {
// Pre-allocate slice of poll options of expected length.
status.PollOptions = make([]string, len(poll.Options))
for i, option := range poll.Options {
// Sanitize each poll option and format.
option = text.SanitizeToPlaintext(option)
optionRes := formatInput(format, option)
// Strip each poll option and format.
//
// For polls just use basic formatting.
option = text.StripHTMLFromText(option)
optionRes := formatInput(p.formatter.FromPlainBasic, option)
// Gather results of the formatted.
status.PollOptions[i] = optionRes.HTML

View File

@@ -189,6 +189,13 @@ func (p *Processor) Create(
PendingApproval: util.Ptr(false),
}
// Only store ContentWarningText if the parsed
// result is different from the given SpoilerText,
// otherwise skip to avoid duplicating db columns.
if content.ContentWarning != form.SpoilerText {
status.ContentWarningText = form.SpoilerText
}
if backfill {
// Ensure backfilled status contains no
// mentions to anyone other than author.

View File

@@ -60,33 +60,6 @@ func (suite *StatusCreateTestSuite) TestProcessContentWarningWithQuotationMarks(
suite.Equal("\"test\"", apiStatus.SpoilerText)
}
func (suite *StatusCreateTestSuite) TestProcessContentWarningWithHTMLEscapedQuotationMarks() {
ctx := context.Background()
creatingAccount := suite.testAccounts["local_account_1"]
creatingApplication := suite.testApplications["application_1"]
statusCreateForm := &apimodel.StatusCreateRequest{
Status: "poopoo peepee",
MediaIDs: []string{},
Poll: nil,
InReplyToID: "",
Sensitive: false,
SpoilerText: "&#34test&#34", // the html-escaped quotation marks should appear as normal quotation marks in the finished text
Visibility: apimodel.VisibilityPublic,
LocalOnly: util.Ptr(false),
ScheduledAt: nil,
Language: "en",
ContentType: apimodel.StatusContentTypePlain,
}
apiStatus, err := suite.status.Create(ctx, creatingAccount, creatingApplication, statusCreateForm)
suite.NoError(err)
suite.NotNil(apiStatus)
suite.Equal("\"test\"", apiStatus.SpoilerText)
}
func (suite *StatusCreateTestSuite) TestProcessStatusMarkdownWithUnderscoreEmoji() {
ctx := context.Background()

View File

@@ -50,6 +50,13 @@ func (p *Processor) Delete(ctx context.Context, requestingAccount *gtsmodel.Acco
return nil, errWithCode
}
// Replace content warning with raw
// version if it's available, to make
// delete + redraft work nicer.
if targetStatus.ContentWarningText != "" {
apiStatus.SpoilerText = targetStatus.ContentWarningText
}
// Process delete side effects.
p.state.Workers.Client.Queue.Push(&messages.FromClientAPI{
APObjectType: ap.ObjectNote,

View File

@@ -301,7 +301,7 @@ func (p *Processor) Edit(
// update the other necessary status fields.
status.Content = content.Content
status.ContentWarning = content.ContentWarning
status.Text = form.Status
status.Text = form.Status // raw
status.ContentType = contentType
status.Language = content.Language
status.Sensitive = &form.Sensitive
@@ -309,6 +309,13 @@ func (p *Processor) Edit(
status.Attachments = media
status.EditedAt = now
// Only store ContentWarningText if the parsed
// result is different from the given SpoilerText,
// otherwise skip to avoid duplicating db columns.
if content.ContentWarning != form.SpoilerText {
status.ContentWarningText = form.SpoilerText
}
if poll != nil {
// Set relevent fields for latest with poll.
status.ActivityStreamsType = ap.ActivityQuestion

View File

@@ -53,10 +53,21 @@ func (p *Processor) SourceGet(ctx context.Context, requester *gtsmodel.Account,
"target status not found",
)
}
// Try to use unparsed content
// warning text if available,
// fall back to parsed cw html.
var spoilerText string
if status.ContentWarningText != "" {
spoilerText = status.ContentWarningText
} else {
spoilerText = status.ContentWarning
}
return &apimodel.StatusSource{
ID: status.ID,
Text: status.Text,
SpoilerText: status.ContentWarning,
SpoilerText: spoilerText,
ContentType: typeutils.ContentTypeToAPIContentType(status.ContentType),
}, nil
}

View File

@@ -71,7 +71,7 @@ func (suite *StatusUpdateTestSuite) TestStreamNotification() {
"muted": false,
"bookmarked": false,
"pinned": false,
"content": "dark souls status bot: \"thoughts of dog\"",
"content": "\u003cp\u003edark souls status bot: \"thoughts of dog\"\u003c/p\u003e",
"reblog": null,
"account": {
"id": "01F8MH5ZK5VRH73AKHQM6Y9VNX",

View File

@@ -122,7 +122,7 @@ func (p *Processor) Create(
Username: form.Username,
Email: form.Email,
Password: form.Password,
Reason: text.SanitizeToPlaintext(reason),
Reason: text.StripHTMLFromText(reason),
SignUpIP: form.IP,
Locale: form.Locale,
AppID: app.ID,