From a940a520d301d00f42012743b3999a73f7180848 Mon Sep 17 00:00:00 2001 From: Tobi Smethurst <31960611+tsmethurst@users.noreply.github.com> Date: Thu, 29 Jul 2021 13:18:22 +0200 Subject: [PATCH] Link hashtag bug (#121) * link + hashtag bug * remove printlns * tidy up some duplicated code --- .../api/client/status/statuscreate_test.go | 78 +++++++++++++---- internal/text/common.go | 31 ++++++- internal/text/formatter.go | 7 ++ internal/text/formatter_test.go | 51 +++++++++++ internal/text/link.go | 2 +- internal/text/link_test.go | 64 ++++++++++---- internal/text/markdown.go | 20 +---- internal/text/plain.go | 20 ++--- internal/text/plain_test.go | 84 +++++++++++++++++++ internal/text/sanitize.go | 14 +++- internal/util/regexes.go | 17 ++-- internal/util/statustools.go | 24 ++++-- internal/util/statustools_test.go | 15 +++- internal/util/validation_test.go | 18 ++-- testrig/testmodels.go | 1 + 15 files changed, 349 insertions(+), 97 deletions(-) create mode 100644 internal/text/formatter_test.go create mode 100644 internal/text/plain_test.go diff --git a/internal/api/client/status/statuscreate_test.go b/internal/api/client/status/statuscreate_test.go index b19323869..dd4a4386b 100644 --- a/internal/api/client/status/statuscreate_test.go +++ b/internal/api/client/status/statuscreate_test.go @@ -57,6 +57,7 @@ func (suite *StatusCreateTestSuite) SetupTest() { suite.db = testrig.NewTestDB() suite.storage = testrig.NewTestStorage() suite.log = testrig.NewTestLog() + suite.tc = testrig.NewTestTypeConverter(suite.db) suite.federator = testrig.NewTestFederator(suite.db, testrig.NewTestTransportController(testrig.NewMockHTTPClient(nil)), suite.storage) suite.processor = testrig.NewTestProcessor(suite.db, suite.storage, suite.federator) suite.statusModule = status.New(suite.config, suite.processor, suite.log).(*status.Module) @@ -69,6 +70,14 @@ func (suite *StatusCreateTestSuite) TearDownTest() { testrig.StandardStorageTeardown(suite.storage) } +var statusWithLinksAndTags = `#test alright, should be able to post #links with fragments in them now, let's see........ + +https://docs.gotosocial.org/en/latest/user_guide/posts/#links + +#gotosocial + +(tobi remember to pull the docker image challenge)` + // Post a new status with some custom visibility settings func (suite *StatusCreateTestSuite) TestPostNewStatus() { @@ -109,7 +118,7 @@ func (suite *StatusCreateTestSuite) TestPostNewStatus() { assert.NoError(suite.T(), err) assert.Equal(suite.T(), "hello hello", statusReply.SpoilerText) - assert.Equal(suite.T(), "this is a brand new status! #helloworld", statusReply.Content) + assert.Equal(suite.T(), "

this is a brand new status! #helloworld

", statusReply.Content) assert.True(suite.T(), statusReply.Sensitive) assert.Equal(suite.T(), model.VisibilityPrivate, statusReply.Visibility) assert.Len(suite.T(), statusReply.Tags, 1) @@ -124,6 +133,43 @@ func (suite *StatusCreateTestSuite) TestPostNewStatus() { assert.Equal(suite.T(), statusReply.Account.ID, gtsTag.FirstSeenFromAccountID) } +func (suite *StatusCreateTestSuite) TestPostAnotherNewStatus() { + + t := suite.testTokens["local_account_1"] + oauthToken := oauth.TokenToOauthToken(t) + + // setup + recorder := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(recorder) + ctx.Set(oauth.SessionAuthorizedApplication, suite.testApplications["application_1"]) + ctx.Set(oauth.SessionAuthorizedToken, oauthToken) + ctx.Set(oauth.SessionAuthorizedUser, suite.testUsers["local_account_1"]) + ctx.Set(oauth.SessionAuthorizedAccount, suite.testAccounts["local_account_1"]) + ctx.Request = httptest.NewRequest(http.MethodPost, fmt.Sprintf("http://localhost:8080/%s", status.BasePath), nil) // the endpoint we're hitting + ctx.Request.Form = url.Values{ + "status": {statusWithLinksAndTags}, + } + suite.statusModule.StatusCreatePOSTHandler(ctx) + + // check response + + // 1. we should have OK from our call to the function + suite.EqualValues(http.StatusOK, recorder.Code) + + result := recorder.Result() + defer result.Body.Close() + b, err := ioutil.ReadAll(result.Body) + assert.NoError(suite.T(), err) + + fmt.Println(string(b)) + + statusReply := &model.Status{} + err = json.Unmarshal(b, statusReply) + assert.NoError(suite.T(), err) + + assert.Equal(suite.T(), "

#test alright, should be able to post #links with fragments in them now, let's see........

docs.gotosocial.org/en/latest/user_guide/posts/#links
#gotosocial

(tobi remember to pull the docker image challenge)

", statusReply.Content) +} + func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() { t := suite.testTokens["local_account_1"] @@ -154,7 +200,7 @@ func (suite *StatusCreateTestSuite) TestPostNewStatusWithEmoji() { assert.NoError(suite.T(), err) assert.Equal(suite.T(), "", statusReply.SpoilerText) - assert.Equal(suite.T(), "here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow: \n here's an emoji that isn't in the db: :test_emoji: ", statusReply.Content) + assert.Equal(suite.T(), "

here is a rainbow emoji a few times! :rainbow: :rainbow: :rainbow:
here's an emoji that isn't in the db: :test_emoji:

", statusReply.Content) assert.Len(suite.T(), statusReply.Emojis, 1) mastoEmoji := statusReply.Emojis[0] @@ -228,7 +274,7 @@ func (suite *StatusCreateTestSuite) TestReplyToLocalStatus() { assert.NoError(suite.T(), err) assert.Equal(suite.T(), "", statusReply.SpoilerText) - assert.Equal(suite.T(), fmt.Sprintf("hello @%s this reply should work!", testrig.NewTestAccounts()["local_account_2"].Username), statusReply.Content) + assert.Equal(suite.T(), fmt.Sprintf("

hello @%s this reply should work!

", testrig.NewTestAccounts()["local_account_2"].Username, testrig.NewTestAccounts()["local_account_2"].Username), statusReply.Content) assert.False(suite.T(), statusReply.Sensitive) assert.Equal(suite.T(), model.VisibilityPublic, statusReply.Visibility) assert.Equal(suite.T(), testrig.NewTestStatuses()["local_account_2_status_1"].ID, statusReply.InReplyToID) @@ -241,6 +287,8 @@ func (suite *StatusCreateTestSuite) TestAttachNewMediaSuccess() { t := suite.testTokens["local_account_1"] oauthToken := oauth.TokenToOauthToken(t) + attachment := suite.testAttachments["local_account_1_unattached_1"] + // setup recorder := httptest.NewRecorder() ctx, _ := gin.CreateTestContext(recorder) @@ -251,7 +299,7 @@ func (suite *StatusCreateTestSuite) TestAttachNewMediaSuccess() { ctx.Request = httptest.NewRequest(http.MethodPost, fmt.Sprintf("http://localhost:8080/%s", status.BasePath), nil) // the endpoint we're hitting ctx.Request.Form = url.Values{ "status": {"here's an image attachment"}, - "media_ids": {"7a3b9f77-ab30-461e-bdd8-e64bd1db3008"}, + "media_ids": {attachment.ID}, } suite.statusModule.StatusCreatePOSTHandler(ctx) @@ -263,23 +311,21 @@ func (suite *StatusCreateTestSuite) TestAttachNewMediaSuccess() { b, err := ioutil.ReadAll(result.Body) assert.NoError(suite.T(), err) - fmt.Println(string(b)) - - statusReply := &model.Status{} - err = json.Unmarshal(b, statusReply) + statusResponse := &model.Status{} + err = json.Unmarshal(b, statusResponse) assert.NoError(suite.T(), err) - assert.Equal(suite.T(), "", statusReply.SpoilerText) - assert.Equal(suite.T(), "here's an image attachment", statusReply.Content) - assert.False(suite.T(), statusReply.Sensitive) - assert.Equal(suite.T(), model.VisibilityPublic, statusReply.Visibility) + assert.Equal(suite.T(), "", statusResponse.SpoilerText) + assert.Equal(suite.T(), "

here's an image attachment

", statusResponse.Content) + assert.False(suite.T(), statusResponse.Sensitive) + assert.Equal(suite.T(), model.VisibilityPublic, statusResponse.Visibility) // there should be one media attachment - assert.Len(suite.T(), statusReply.MediaAttachments, 1) + assert.Len(suite.T(), statusResponse.MediaAttachments, 1) // get the updated media attachment from the database gtsAttachment := >smodel.MediaAttachment{} - err = suite.db.GetByID(statusReply.MediaAttachments[0].ID, gtsAttachment) + err = suite.db.GetByID(statusResponse.MediaAttachments[0].ID, gtsAttachment) assert.NoError(suite.T(), err) // convert it to a masto attachment @@ -287,10 +333,10 @@ func (suite *StatusCreateTestSuite) TestAttachNewMediaSuccess() { assert.NoError(suite.T(), err) // compare it with what we have now - assert.EqualValues(suite.T(), statusReply.MediaAttachments[0], gtsAttachmentAsMasto) + assert.EqualValues(suite.T(), statusResponse.MediaAttachments[0], gtsAttachmentAsMasto) // the status id of the attachment should now be set to the id of the status we just created - assert.Equal(suite.T(), statusReply.ID, gtsAttachment.StatusID) + assert.Equal(suite.T(), statusResponse.ID, gtsAttachment.StatusID) } func TestStatusCreateTestSuite(t *testing.T) { diff --git a/internal/text/common.go b/internal/text/common.go index 0165af630..98ec892a7 100644 --- a/internal/text/common.go +++ b/internal/text/common.go @@ -21,6 +21,9 @@ package text import ( "fmt" "strings" + + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/util" ) // preformat contains some common logic for making a string ready for formatting, which should be used for all user-input text. @@ -35,7 +38,7 @@ func preformat(in string) string { func postformat(in string) string { // do some postformatting of the text // 1. sanitize html to remove any dodgy scripts or other disallowed elements - s := SanitizeHTML(in) + s := SanitizeOutgoing(in) // 2. wrap the whole thing in a paragraph s = fmt.Sprintf(`

%s

`, s) // 3. remove any cheeky newlines @@ -44,3 +47,29 @@ func postformat(in string) string { s = strings.TrimSpace(s) return s } + +func (f *formatter) ReplaceTags(in string, tags []*gtsmodel.Tag) string { + return util.HashtagFinderRegex.ReplaceAllStringFunc(in, func(match string) string { + for _, tag := range tags { + if strings.TrimSpace(match) == fmt.Sprintf("#%s", tag.Name) { + tagContent := fmt.Sprintf(``, tag.URL, tag.Name) + if strings.HasPrefix(match, " ") { + tagContent = " " + tagContent + } + return tagContent + } + } + return in + }) +} + +func (f *formatter) ReplaceMentions(in string, mentions []*gtsmodel.Mention) string { + for _, menchie := range mentions { + targetAccount := >smodel.Account{} + if err := f.db.GetByID(menchie.TargetAccountID, targetAccount); err == nil { + mentionContent := fmt.Sprintf(`@%s`, targetAccount.URL, targetAccount.Username) + in = strings.ReplaceAll(in, menchie.NameString, mentionContent) + } + } + return in +} diff --git a/internal/text/formatter.go b/internal/text/formatter.go index f8cca6675..39aaae559 100644 --- a/internal/text/formatter.go +++ b/internal/text/formatter.go @@ -31,6 +31,13 @@ type Formatter interface { FromMarkdown(md string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string // FromPlain parses an HTML text from a plaintext. FromPlain(plain string, mentions []*gtsmodel.Mention, tags []*gtsmodel.Tag) string + + // ReplaceTags takes a piece of text and a slice of tags, and returns the same text with the tags nicely formatted as hrefs. + ReplaceTags(in string, tags []*gtsmodel.Tag) string + // ReplaceMentions takes a piece of text and a slice of mentions, and returns the same text with the mentions nicely formatted as hrefs. + ReplaceMentions(in string, mentions []*gtsmodel.Mention) string + // ReplaceLinks takes a piece of text, finds all recognizable links in that text, and replaces them with hrefs. + ReplaceLinks(in string) string } type formatter struct { diff --git a/internal/text/formatter_test.go b/internal/text/formatter_test.go new file mode 100644 index 000000000..2c9c18546 --- /dev/null +++ b/internal/text/formatter_test.go @@ -0,0 +1,51 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package text_test + +import ( + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/oauth" + "github.com/superseriousbusiness/gotosocial/internal/text" +) + +// nolint +type TextStandardTestSuite struct { + // standard suite interfaces + suite.Suite + config *config.Config + db db.DB + log *logrus.Logger + + // standard suite models + testTokens map[string]*oauth.Token + testClients map[string]*oauth.Client + testApplications map[string]*gtsmodel.Application + testUsers map[string]*gtsmodel.User + testAccounts map[string]*gtsmodel.Account + testAttachments map[string]*gtsmodel.MediaAttachment + testStatuses map[string]*gtsmodel.Status + testTags map[string]*gtsmodel.Tag + + // module being tested + formatter text.Formatter +} diff --git a/internal/text/link.go b/internal/text/link.go index 440571a83..d42cc3b68 100644 --- a/internal/text/link.go +++ b/internal/text/link.go @@ -82,7 +82,7 @@ func contains(urls []*url.URL, url *url.URL) bool { // Note: because Go doesn't allow negative lookbehinds in regex, it's possible that an already-formatted // href will end up double-formatted, if the text you pass here contains one or more hrefs already. // To avoid this, you should sanitize any HTML out of text before you pass it into this function. -func ReplaceLinks(in string) string { +func (f *formatter) ReplaceLinks(in string) string { rxStrict, err := xurls.StrictMatchingScheme(schemes) if err != nil { panic(err) diff --git a/internal/text/link_test.go b/internal/text/link_test.go index 636f26f7f..15e27f870 100644 --- a/internal/text/link_test.go +++ b/internal/text/link_test.go @@ -24,6 +24,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/suite" "github.com/superseriousbusiness/gotosocial/internal/text" + "github.com/superseriousbusiness/gotosocial/testrig" ) const text1 = ` @@ -64,11 +65,40 @@ what happens when we already have a link within an href? https://example.org ` -type TextTestSuite struct { - suite.Suite +type LinkTestSuite struct { + TextStandardTestSuite } -func (suite *TextTestSuite) TestParseURLsFromText1() { +func (suite *LinkTestSuite) SetupSuite() { + suite.testTokens = testrig.NewTestTokens() + suite.testClients = testrig.NewTestClients() + suite.testApplications = testrig.NewTestApplications() + suite.testUsers = testrig.NewTestUsers() + suite.testAccounts = testrig.NewTestAccounts() + suite.testAttachments = testrig.NewTestAttachments() + suite.testStatuses = testrig.NewTestStatuses() + suite.testTags = testrig.NewTestTags() +} + +func (suite *LinkTestSuite) SetupTest() { + suite.config = testrig.NewTestConfig() + suite.db = testrig.NewTestDB() + suite.log = testrig.NewTestLog() + suite.formatter = text.NewFormatter(suite.config, suite.db, suite.log) + + testrig.StandardDBSetup(suite.db) +} + +func (suite *LinkTestSuite) TearDownTest() { + testrig.StandardDBTeardown(suite.db) +} + +func (suite *LinkTestSuite) TestParseSimple() { + f := suite.formatter.FromPlain(simple, nil, nil) + assert.Equal(suite.T(), simpleExpected, f) +} + +func (suite *LinkTestSuite) TestParseURLsFromText1() { urls, err := text.FindLinks(text1) assert.NoError(suite.T(), err) @@ -79,7 +109,7 @@ func (suite *TextTestSuite) TestParseURLsFromText1() { assert.Equal(suite.T(), "https://example.orghttps://google.com", urls[3].String()) } -func (suite *TextTestSuite) TestParseURLsFromText2() { +func (suite *LinkTestSuite) TestParseURLsFromText2() { urls, err := text.FindLinks(text2) assert.NoError(suite.T(), err) @@ -87,7 +117,7 @@ func (suite *TextTestSuite) TestParseURLsFromText2() { assert.Len(suite.T(), urls, 1) } -func (suite *TextTestSuite) TestParseURLsFromText3() { +func (suite *LinkTestSuite) TestParseURLsFromText3() { urls, err := text.FindLinks(text3) assert.NoError(suite.T(), err) @@ -95,8 +125,8 @@ func (suite *TextTestSuite) TestParseURLsFromText3() { assert.Len(suite.T(), urls, 0) } -func (suite *TextTestSuite) TestReplaceLinksFromText1() { - replaced := text.ReplaceLinks(text1) +func (suite *LinkTestSuite) TestReplaceLinksFromText1() { + replaced := suite.formatter.ReplaceLinks(text1) assert.Equal(suite.T(), ` This is a text with some links in it. Here's link number one: example.org/link/to/something#fragment @@ -110,8 +140,8 @@ really.cool.website <-- this one shouldn't be parsed as a link because it doesn' `, replaced) } -func (suite *TextTestSuite) TestReplaceLinksFromText2() { - replaced := text.ReplaceLinks(text2) +func (suite *LinkTestSuite) TestReplaceLinksFromText2() { + replaced := suite.formatter.ReplaceLinks(text2) assert.Equal(suite.T(), ` this is one link: example.org @@ -121,16 +151,16 @@ these should be deduplicated `, replaced) } -func (suite *TextTestSuite) TestReplaceLinksFromText3() { +func (suite *LinkTestSuite) TestReplaceLinksFromText3() { // we know mailto links won't be replaced with hrefs -- we only accept https and http - replaced := text.ReplaceLinks(text3) + replaced := suite.formatter.ReplaceLinks(text3) assert.Equal(suite.T(), ` here's a mailto link: mailto:whatever@test.org `, replaced) } -func (suite *TextTestSuite) TestReplaceLinksFromText4() { - replaced := text.ReplaceLinks(text4) +func (suite *LinkTestSuite) TestReplaceLinksFromText4() { + replaced := suite.formatter.ReplaceLinks(text4) assert.Equal(suite.T(), ` two similar links: @@ -140,9 +170,9 @@ two similar links: `, replaced) } -func (suite *TextTestSuite) TestReplaceLinksFromText5() { +func (suite *LinkTestSuite) TestReplaceLinksFromText5() { // we know this one doesn't work properly, which is why html should always be sanitized before being passed into the ReplaceLinks function - replaced := text.ReplaceLinks(text5) + replaced := suite.formatter.ReplaceLinks(text5) assert.Equal(suite.T(), ` what happens when we already have a link within an href? @@ -150,6 +180,6 @@ what happens when we already have a link within an href? `, replaced) } -func TestTextTestSuite(t *testing.T) { - suite.Run(t, new(TextTestSuite)) +func TestLinkTestSuite(t *testing.T) { + suite.Run(t, new(LinkTestSuite)) } diff --git a/internal/text/markdown.go b/internal/text/markdown.go index d1309f389..f9d12209a 100644 --- a/internal/text/markdown.go +++ b/internal/text/markdown.go @@ -19,9 +19,6 @@ package text import ( - "fmt" - "strings" - "github.com/russross/blackfriday/v2" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) @@ -39,20 +36,11 @@ func (f *formatter) FromMarkdown(md string, mentions []*gtsmodel.Mention, tags [ // do the markdown parsing *first* content = string(blackfriday.Run([]byte(content), blackfriday.WithExtensions(bfExtensions))) - // format mentions nicely - for _, menchie := range mentions { - targetAccount := >smodel.Account{} - if err := f.db.GetByID(menchie.TargetAccountID, targetAccount); err == nil { - mentionContent := fmt.Sprintf(`@%s`, targetAccount.URL, targetAccount.Username) - content = strings.ReplaceAll(content, menchie.NameString, mentionContent) - } - } - // format tags nicely - for _, tag := range tags { - tagContent := fmt.Sprintf(``, tag.URL, tag.Name) - content = strings.ReplaceAll(content, fmt.Sprintf("#%s", tag.Name), tagContent) - } + content = f.ReplaceTags(content, tags) + + // format mentions nicely + content = f.ReplaceMentions(content, mentions) return postformat(content) } diff --git a/internal/text/plain.go b/internal/text/plain.go index 4f6659484..40fb6412f 100644 --- a/internal/text/plain.go +++ b/internal/text/plain.go @@ -19,7 +19,6 @@ package text import ( - "fmt" "strings" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" @@ -29,22 +28,13 @@ func (f *formatter) FromPlain(plain string, mentions []*gtsmodel.Mention, tags [ content := preformat(plain) // format links nicely - content = ReplaceLinks(content) - - // format mentions nicely - for _, menchie := range mentions { - targetAccount := >smodel.Account{} - if err := f.db.GetByID(menchie.TargetAccountID, targetAccount); err == nil { - mentionContent := fmt.Sprintf(`@%s`, targetAccount.URL, targetAccount.Username) - content = strings.ReplaceAll(content, menchie.NameString, mentionContent) - } - } + content = f.ReplaceLinks(content) // format tags nicely - for _, tag := range tags { - tagContent := fmt.Sprintf(``, tag.URL, tag.Name) - content = strings.ReplaceAll(content, fmt.Sprintf("#%s", tag.Name), tagContent) - } + content = f.ReplaceTags(content, tags) + + // format mentions nicely + content = f.ReplaceMentions(content, mentions) // replace newlines with breaks content = strings.ReplaceAll(content, "\n", "
") diff --git a/internal/text/plain_test.go b/internal/text/plain_test.go new file mode 100644 index 000000000..1e0d1471a --- /dev/null +++ b/internal/text/plain_test.go @@ -0,0 +1,84 @@ +/* + GoToSocial + Copyright (C) 2021 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package text_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/text" + "github.com/superseriousbusiness/gotosocial/testrig" +) + +const ( + simple = "this is a plain and simple status" + simpleExpected = "

this is a plain and simple status

" + + withTag = "this is a simple status that uses hashtag #welcome!" + withTagExpected = "

this is a simple status that uses hashtag #welcome!

" +) + +type PlainTestSuite struct { + TextStandardTestSuite +} + +func (suite *PlainTestSuite) SetupSuite() { + suite.testTokens = testrig.NewTestTokens() + suite.testClients = testrig.NewTestClients() + suite.testApplications = testrig.NewTestApplications() + suite.testUsers = testrig.NewTestUsers() + suite.testAccounts = testrig.NewTestAccounts() + suite.testAttachments = testrig.NewTestAttachments() + suite.testStatuses = testrig.NewTestStatuses() + suite.testTags = testrig.NewTestTags() +} + +func (suite *PlainTestSuite) SetupTest() { + suite.config = testrig.NewTestConfig() + suite.db = testrig.NewTestDB() + suite.log = testrig.NewTestLog() + suite.formatter = text.NewFormatter(suite.config, suite.db, suite.log) + + testrig.StandardDBSetup(suite.db) +} + +func (suite *PlainTestSuite) TearDownTest() { + testrig.StandardDBTeardown(suite.db) +} + +func (suite *PlainTestSuite) TestParseSimple() { + f := suite.formatter.FromPlain(simple, nil, nil) + assert.Equal(suite.T(), simpleExpected, f) +} + +func (suite *PlainTestSuite) TestParseWithTag() { + + foundTags := []*gtsmodel.Tag{ + suite.testTags["welcome"], + } + + f := suite.formatter.FromPlain(withTag, nil, foundTags) + assert.Equal(suite.T(), withTagExpected, f) +} + +func TestPlainTestSuite(t *testing.T) { + suite.Run(t, new(PlainTestSuite)) +} diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go index aac9d8aab..365875d46 100644 --- a/internal/text/sanitize.go +++ b/internal/text/sanitize.go @@ -30,7 +30,13 @@ import ( var regular *bluemonday.Policy = bluemonday.UGCPolicy(). RequireNoReferrerOnLinks(true). RequireNoFollowOnLinks(true). - RequireCrossOriginAnonymous(true) + RequireCrossOriginAnonymous(true). + AddTargetBlankToFullyQualifiedLinks(true) + +// outgoing policy should be used on statuses we've already parsed and added our own elements etc to. It is less strict than regular. +var outgoing *bluemonday.Policy = regular. + AllowAttrs("class", "href", "rel").OnElements("a"). + AllowAttrs("class").OnElements("span") // '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist. // An example usage scenario would be blog post titles where HTML tags are not expected at all @@ -48,3 +54,9 @@ func SanitizeHTML(in string) string { func RemoveHTML(in string) string { return strict.Sanitize(in) } + +// SanitizeOutgoing cleans up HTML in the given string, allowing through only safe elements and elements that were added during the parsing process. +// This should be used on text that we've already converted into HTML, just to catch any weirdness. +func SanitizeOutgoing(in string) string { + return outgoing.Sanitize(in) +} diff --git a/internal/util/regexes.go b/internal/util/regexes.go index 1ca34708f..c03fd878c 100644 --- a/internal/util/regexes.go +++ b/internal/util/regexes.go @@ -30,25 +30,26 @@ const ( ) var ( - mentionNameRegexString = `^@([a-zA-Z0-9_]+)(?:@([a-zA-Z0-9_\-\.]+)?)$` + mentionNameRegexString = `^@(\w+)(?:@([a-zA-Z0-9_\-\.]+)?)$` // mention name regex captures the username and domain part from a mention string // such as @whatever_user@example.org, returning whatever_user and example.org (without the @ symbols) mentionNameRegex = regexp.MustCompile(mentionNameRegexString) // mention regex can be played around with here: https://regex101.com/r/qwM9D3/1 - mentionFinderRegexString = `(?: |^|\W)(@[a-zA-Z0-9_]+(?:@[a-zA-Z0-9_\-\.]+)?)(?:[^a-zA-Z0-9]|\W|$)?` + mentionFinderRegexString = `(?:\B)(@\w+(?:@[a-zA-Z0-9_\-\.]+)?)(?:\B)?` mentionFinderRegex = regexp.MustCompile(mentionFinderRegexString) - // hashtag regex can be played with here: https://regex101.com/r/Vhy8pg/1 - hashtagFinderRegexString = fmt.Sprintf(`(?:\b)?#(\w{1,%d})(?:\b)`, maximumHashtagLength) - hashtagFinderRegex = regexp.MustCompile(hashtagFinderRegexString) + // hashtag regex can be played with here: https://regex101.com/r/bPxeca/1 + hashtagFinderRegexString = fmt.Sprintf(`(?:^|\n|\s)(#[a-zA-Z0-9]{1,%d})(?:\b)`, maximumHashtagLength) + // HashtagFinderRegex finds possible hashtags in a string. + // It returns just the string part of the hashtag, not the # symbol. + HashtagFinderRegex = regexp.MustCompile(hashtagFinderRegexString) - // emoji shortcode regex can be played with here: https://regex101.com/r/zMDRaG/1 - emojiShortcodeRegexString = fmt.Sprintf(`[a-z0-9_]{2,%d}`, maximumEmojiShortcodeLength) + emojiShortcodeRegexString = fmt.Sprintf(`\w{2,%d}`, maximumEmojiShortcodeLength) emojiShortcodeValidationRegex = regexp.MustCompile(fmt.Sprintf("^%s$", emojiShortcodeRegexString)) // emoji regex can be played with here: https://regex101.com/r/478XGM/1 - emojiFinderRegexString = fmt.Sprintf(`(?: |^|\W)?:(%s):(?:\b|\r)?`, emojiShortcodeRegexString) + emojiFinderRegexString = fmt.Sprintf(`(?:\B)?:(%s):(?:\B)?`, emojiShortcodeRegexString) emojiFinderRegex = regexp.MustCompile(emojiFinderRegexString) // usernameRegexString defines an acceptable username on this instance diff --git a/internal/util/statustools.go b/internal/util/statustools.go index b51f2c80c..93294da68 100644 --- a/internal/util/statustools.go +++ b/internal/util/statustools.go @@ -29,7 +29,6 @@ import ( // // It will look for fully-qualified account names in the form "@user@example.org". // or the form "@username" for local users. -// The case of the returned mentions will be lowered, for consistency. func DeriveMentionsFromStatus(status string) []string { mentionedAccounts := []string{} for _, m := range mentionFinderRegex.FindAllStringSubmatch(status, -1) { @@ -44,16 +43,15 @@ func DeriveMentionsFromStatus(status string) []string { // tags will be lowered, for consistency. func DeriveHashtagsFromStatus(status string) []string { tags := []string{} - for _, m := range hashtagFinderRegex.FindAllStringSubmatch(status, -1) { - tags = append(tags, m[1]) + for _, m := range HashtagFinderRegex.FindAllStringSubmatch(status, -1) { + tags = append(tags, strings.TrimPrefix(m[1], "#")) } - return unique(tags) + return uniqueLower(tags) } // DeriveEmojisFromStatus takes a plaintext (ie., not html-formatted) status, // and applies a regex to it to return a deduplicated list of emojis -// used in that status, without the surround ::. The case of the returned -// emojis will be lowered, for consistency. +// used in that status, without the surround ::. func DeriveEmojisFromStatus(status string) []string { emojis := []string{} for _, m := range emojiFinderRegex.FindAllStringSubmatch(status, -1) { @@ -94,3 +92,17 @@ func unique(s []string) []string { } return list } + +// uniqueLower returns a deduplicated version of a given string slice, with all entries converted to lowercase +func uniqueLower(s []string) []string { + keys := make(map[string]bool) + list := []string{} + for _, entry := range s { + eLower := strings.ToLower(entry) + if _, value := keys[eLower]; !value { + keys[eLower] = true + list = append(list, eLower) + } + } + return list +} diff --git a/internal/util/statustools_test.go b/internal/util/statustools_test.go index 2a12c7690..5bdce2d5a 100644 --- a/internal/util/statustools_test.go +++ b/internal/util/statustools_test.go @@ -37,17 +37,22 @@ func (suite *StatusTestSuite) TestDeriveMentionsOK() { @someone_else@testing.best-horse.com can you confirm? @hello@test.lgbt - @thisisalocaluser ! @NORWILL@THIS.one!! + @thisisalocaluser! + + here is a duplicate mention: @hello@test.lgbt @hello@test.lgbt + + @account1@whatever.com @account2@whatever.com - here is a duplicate mention: @hello@test.lgbt ` menchies := util.DeriveMentionsFromStatus(statusText) - assert.Len(suite.T(), menchies, 4) + assert.Len(suite.T(), menchies, 6) assert.Equal(suite.T(), "@dumpsterqueer@example.org", menchies[0]) assert.Equal(suite.T(), "@someone_else@testing.best-horse.com", menchies[1]) assert.Equal(suite.T(), "@hello@test.lgbt", menchies[2]) assert.Equal(suite.T(), "@thisisalocaluser", menchies[3]) + assert.Equal(suite.T(), "@account1@whatever.com", menchies[4]) + assert.Equal(suite.T(), "@account2@whatever.com", menchies[5]) } func (suite *StatusTestSuite) TestDeriveMentionsEmpty() { @@ -57,12 +62,14 @@ func (suite *StatusTestSuite) TestDeriveMentionsEmpty() { } func (suite *StatusTestSuite) TestDeriveHashtagsOK() { - statusText := `#testing123 #also testing + statusText := `weeeeeeee #testing123 #also testing # testing this one shouldn't work #thisshouldwork + here's a link with a fragment: https://example.org/whatever#ahhh + #ThisShouldAlsoWork #not_this_though #111111 thisalsoshouldn'twork#### ##` diff --git a/internal/util/validation_test.go b/internal/util/validation_test.go index 73f5cb977..639a89bbd 100644 --- a/internal/util/validation_test.go +++ b/internal/util/validation_test.go @@ -102,32 +102,32 @@ func (suite *ValidationTestSuite) TestValidateUsername() { err = util.ValidateUsername(tooLong) if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("username should be no more than 64 chars but '%s' was 66", tooLong), err) + assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", tooLong), err) } err = util.ValidateUsername(withSpaces) if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", withSpaces), err) + assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", withSpaces), err) } err = util.ValidateUsername(weirdChars) if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", weirdChars), err) + assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", weirdChars), err) } err = util.ValidateUsername(leadingSpace) if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", leadingSpace), err) + assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", leadingSpace), err) } err = util.ValidateUsername(trailingSpace) if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", trailingSpace), err) + assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", trailingSpace), err) } err = util.ValidateUsername(newlines) if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores", newlines), err) + assert.Equal(suite.T(), fmt.Errorf("given username %s was invalid: must contain only lowercase letters, numbers, and underscores, max 64 characters", newlines), err) } err = util.ValidateUsername(goodUsername) @@ -141,7 +141,6 @@ func (suite *ValidationTestSuite) TestValidateEmail() { notAnEmailAddress := "this-is-no-email-address!" almostAnEmailAddress := "@thisisalmostan@email.address" aWebsite := "https://thisisawebsite.com" - tooLong := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaahhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhh@gmail.com" emailAddress := "thisis.actually@anemail.address" var err error @@ -165,11 +164,6 @@ func (suite *ValidationTestSuite) TestValidateEmail() { assert.Equal(suite.T(), errors.New("mail: missing '@' or angle-addr"), err) } - err = util.ValidateEmail(tooLong) - if assert.Error(suite.T(), err) { - assert.Equal(suite.T(), fmt.Errorf("email address should be no more than 256 chars but '%s' was 286", tooLong), err) - } - err = util.ValidateEmail(emailAddress) if assert.NoError(suite.T(), err) { assert.Equal(suite.T(), nil, err) diff --git a/testrig/testmodels.go b/testrig/testmodels.go index 0b63e0ed2..1934170d2 100644 --- a/testrig/testmodels.go +++ b/testrig/testmodels.go @@ -1041,6 +1041,7 @@ func NewTestTags() map[string]*gtsmodel.Tag { return map[string]*gtsmodel.Tag{ "welcome": { ID: "01F8MHA1A2NF9MJ3WCCQ3K8BSZ", + URL: "http://localhost:8080/tags/welcome", Name: "welcome", FirstSeenFromAccountID: "", CreatedAt: time.Now().Add(-71 * time.Hour),