[bugfix] Fix existing bio text showing as HTML (#531)

* fix existing bio text showing as HTML

- updated replaced mentions to include instance
- strips HTML from account source note in Verify handler
- update text formatter to use buffers for string writes

Signed-off-by: kim <grufwub@gmail.com>

* go away linter

Signed-off-by: kim <grufwub@gmail.com>

* change buf reset location, change html mention tags

Signed-off-by: kim <grufwub@gmail.com>

* reduce FindLinks code complexity

Signed-off-by: kim <grufwub@gmail.com>

* fix HTML to text conversion

Signed-off-by: kim <grufwub@gmail.com>

* Update internal/regexes/regexes.go

Co-authored-by: Mina Galić <mina.galic@puppet.com>

* use improved html2text lib with more options

Signed-off-by: kim <grufwub@gmail.com>

* fix to produce actual plaintext from html

Signed-off-by: kim <grufwub@gmail.com>

* fix span tags instead written as space

Signed-off-by: kim <grufwub@gmail.com>

* performance improvements to regex replacements, fix link replace logic for un-html-ing in the future

Signed-off-by: kim <grufwub@gmail.com>

* fix tag/mention replacements to use input string, fix link replace to not include scheme

Signed-off-by: kim <grufwub@gmail.com>

* use matched input string for link replace href text

Signed-off-by: kim <grufwub@gmail.com>

* remove unused code (to appease linter :sobs:)

Signed-off-by: kim <grufwub@gmail.com>

* improve hashtagFinger regex to be more compliant

Signed-off-by: kim <grufwub@gmail.com>

* update breakReplacer to include both unix and windows line endings

Signed-off-by: kim <grufwub@gmail.com>

* add NoteRaw field to Account to store plaintext account bio, add migration for this, set for sensitive accounts

Signed-off-by: kim <grufwub@gmail.com>

* drop unnecessary code

Signed-off-by: kim <grufwub@gmail.com>

* update text package tests to fix logic changes

Signed-off-by: kim <grufwub@gmail.com>

* add raw note content testing to account update and account verify

Signed-off-by: kim <grufwub@gmail.com>

* remove unused modules

Signed-off-by: kim <grufwub@gmail.com>

* fix emoji regex

Signed-off-by: kim <grufwub@gmail.com>

* fix replacement of hashtags

Signed-off-by: kim <grufwub@gmail.com>

* update code comment

Signed-off-by: kim <grufwub@gmail.com>

Co-authored-by: Mina Galić <mina.galic@puppet.com>
This commit is contained in:
kim
2022-05-07 16:55:27 +01:00
committed by GitHub
parent 08eb271a4c
commit 26b74aefaf
16 changed files with 180 additions and 108 deletions

View File

@@ -75,9 +75,7 @@ func (suite *LinkTestSuite) TestParseSimple() {
}
func (suite *LinkTestSuite) TestParseURLsFromText1() {
urls, err := text.FindLinks(text1)
assert.NoError(suite.T(), err)
urls := text.FindLinks(text1)
assert.Equal(suite.T(), "https://example.org/link/to/something#fragment", urls[0].String())
assert.Equal(suite.T(), "http://test.example.org?q=bahhhhhhhhhhhh", urls[1].String())
@@ -86,16 +84,14 @@ func (suite *LinkTestSuite) TestParseURLsFromText1() {
}
func (suite *LinkTestSuite) TestParseURLsFromText2() {
urls, err := text.FindLinks(text2)
assert.NoError(suite.T(), err)
urls := text.FindLinks(text2)
// assert length 1 because the found links will be deduplicated
assert.Len(suite.T(), urls, 1)
}
func (suite *LinkTestSuite) TestParseURLsFromText3() {
urls, err := text.FindLinks(text3)
assert.NoError(suite.T(), err)
urls := text.FindLinks(text3)
// assert length 0 because `mailto:` isn't accepted
assert.Len(suite.T(), urls, 0)
@@ -112,7 +108,7 @@ Here's link number two: <a href="http://test.example.org?q=bahhhhhhhhhhhh" rel="
really.cool.website <-- this one shouldn't be parsed as a link because it doesn't contain the scheme
<a href="https://example.orghttps://google.com" rel="noopener">example.orghttps//google.com</a> <-- this shouldn't work either, but it does?! OK
<a href="https://example.orghttps://google.com" rel="noopener">example.orghttps://google.com</a> <-- this shouldn't work either, but it does?! OK
`, replaced)
}