mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[bugfix] Fix indentation on multi-line alt text in web view (#4149)
This pull request closes https://codeberg.org/superseriousbusiness/gotosocial/issues/3403 by searching for alt or title attributes in HTML, looking for the shortest indent (if any) at the start of multiline content of said attributes, and remove that shortest indent from each line. This is a bit more fiddly than the "easy" way of doing it, but it has the advantage that it preserves user-added indents at the start of lines of alt text. Reviewed-on: https://codeberg.org/superseriousbusiness/gotosocial/pulls/4149 Co-authored-by: tobi <tobi.smethurst@protonmail.com> Co-committed-by: tobi <tobi.smethurst@protonmail.com>
This commit is contained in:
@@ -25,7 +25,9 @@ import (
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
apimodel "code.superseriousbusiness.org/gotosocial/internal/api/model"
|
||||
@@ -134,25 +136,25 @@ func LoadTemplates(engine *gin.Engine) error {
|
||||
}
|
||||
|
||||
var funcMap = template.FuncMap{
|
||||
"add": add,
|
||||
"acctInstance": acctInstance,
|
||||
"objectPosition": objectPosition,
|
||||
"demojify": demojify,
|
||||
"deref": deref,
|
||||
"emojify": emojify,
|
||||
"escape": escape,
|
||||
"increment": increment,
|
||||
"indent": indent,
|
||||
"indentAttr": indentAttr,
|
||||
"isNil": isNil,
|
||||
"outdentPre": outdentPre,
|
||||
"noescapeAttr": noescapeAttr,
|
||||
"noescape": noescape,
|
||||
"oddOrEven": oddOrEven,
|
||||
"subtract": subtract,
|
||||
"timestampPrecise": timestampPrecise,
|
||||
"timestampVague": timestampVague,
|
||||
"visibilityIcon": visibilityIcon,
|
||||
"add": add,
|
||||
"acctInstance": acctInstance,
|
||||
"objectPosition": objectPosition,
|
||||
"demojify": demojify,
|
||||
"deref": deref,
|
||||
"emojify": emojify,
|
||||
"escape": escape,
|
||||
"increment": increment,
|
||||
"indent": indent,
|
||||
"indentAttr": indentAttr,
|
||||
"isNil": isNil,
|
||||
"outdentPreformatted": outdentPreformatted,
|
||||
"noescapeAttr": noescapeAttr,
|
||||
"noescape": noescape,
|
||||
"oddOrEven": oddOrEven,
|
||||
"subtract": subtract,
|
||||
"timestampPrecise": timestampPrecise,
|
||||
"timestampVague": timestampVague,
|
||||
"visibilityIcon": visibilityIcon,
|
||||
}
|
||||
|
||||
func oddOrEven(n int) string {
|
||||
@@ -291,11 +293,31 @@ func subtract(n1 int, n2 int) int {
|
||||
}
|
||||
|
||||
var (
|
||||
indentRegex = regexp.MustCompile(`(?m)^`)
|
||||
// Find starts of lines to replace with indent.
|
||||
indentRegex = regexp.MustCompile(`(?m)^`)
|
||||
|
||||
// One indent level.
|
||||
indentStr = " "
|
||||
indentStrLen = len(indentStr)
|
||||
indents = strings.Repeat(indentStr, 12)
|
||||
indentPre = regexp.MustCompile(fmt.Sprintf(`(?Ums)^((?:%s)+)<pre>.*</pre>`, indentStr))
|
||||
|
||||
// Preformatted slice of indents.
|
||||
indents = strings.Repeat(indentStr, 12)
|
||||
|
||||
// Measure indent at the start of a line.
|
||||
indentDepthStr = fmt.Sprintf(`^((?:%s)+)`, indentStr)
|
||||
indentDepth = regexp.MustCompile(`(?m)` + indentDepthStr)
|
||||
|
||||
// Find <pre> tags and determine how indented they are.
|
||||
indentPre = regexp.MustCompile(fmt.Sprintf(`(?Ums)%s<pre>.*</pre>`, indentDepthStr))
|
||||
// Find content of alt or title attributes.
|
||||
indentAltOrTitle = regexp.MustCompile(`(?Ums)\b(?:alt|title)="(.*)"(?:\b|>|$)`)
|
||||
|
||||
// Map of lazily-compiled replaceIndent
|
||||
// regexes, keyed by the indent they
|
||||
// replace, to avoid recompilation.
|
||||
//
|
||||
// At *most* 12 entries long.
|
||||
replaceIndents = sync.Map{}
|
||||
)
|
||||
|
||||
// indent appropriately indents the given html
|
||||
@@ -318,32 +340,104 @@ func indentAttr(n int, html template.HTMLAttr) template.HTMLAttr {
|
||||
return noescapeAttr(out)
|
||||
}
|
||||
|
||||
// outdentPre outdents all `<pre></pre>` tags in the
|
||||
// given HTML so that they render correctly in code
|
||||
// blocks, even if they were indented before.
|
||||
func outdentPre(html template.HTML) template.HTML {
|
||||
// outdentPreformatted outdents all preformatted text in
|
||||
// the given HTML, ie., in `alt` and `title` attributes,
|
||||
// and between `<pre>` tags, so that it renders correctly,
|
||||
// even if it was indented before.
|
||||
func outdentPreformatted(html template.HTML) template.HTML {
|
||||
input := string(html)
|
||||
output := regexes.ReplaceAllStringFunc(indentPre, input,
|
||||
func(match string, buf *bytes.Buffer) string {
|
||||
// Reuse the regex to pull out submatches.
|
||||
matches := indentPre.FindAllStringSubmatch(match, -1)
|
||||
|
||||
// Ensure matches
|
||||
// expected length.
|
||||
if len(matches) != 1 {
|
||||
return match
|
||||
}
|
||||
|
||||
// Ensure inner matches
|
||||
// expected length.
|
||||
innerMatches := matches[0]
|
||||
if len(innerMatches) != 2 {
|
||||
return match
|
||||
}
|
||||
|
||||
var (
|
||||
indented = matches[0][0]
|
||||
indent = matches[0][1]
|
||||
indentedContent = innerMatches[0]
|
||||
indent = innerMatches[1]
|
||||
)
|
||||
|
||||
// Outdent everything in the inner match, add
|
||||
// a newline at the end to make it a bit neater.
|
||||
outdented := strings.ReplaceAll(indented, indent, "")
|
||||
// Outdent everything in the inner match.
|
||||
outdented := strings.ReplaceAll(indentedContent, indent, "")
|
||||
|
||||
// Replace original match with the outdented version.
|
||||
return strings.ReplaceAll(match, indented, outdented)
|
||||
return strings.ReplaceAll(match, indentedContent, outdented)
|
||||
},
|
||||
)
|
||||
|
||||
output = regexes.ReplaceAllStringFunc(indentAltOrTitle, output,
|
||||
func(match string, buf *bytes.Buffer) string {
|
||||
// Reuse the regex to pull out submatches.
|
||||
matches := indentAltOrTitle.FindAllStringSubmatch(match, -1)
|
||||
|
||||
// Ensure matches
|
||||
// expected length.
|
||||
if len(matches) != 1 {
|
||||
return match
|
||||
}
|
||||
|
||||
// Ensure inner matches
|
||||
// expected length.
|
||||
innerMatches := matches[0]
|
||||
if len(innerMatches) != 2 {
|
||||
return match
|
||||
}
|
||||
|
||||
// The content of the alt or title
|
||||
// attr inside quotation marks.
|
||||
indentedContent := innerMatches[1]
|
||||
|
||||
// Find all indents in this text.
|
||||
indents := indentDepth.FindAllString(indentedContent, -1)
|
||||
if len(indents) == 0 {
|
||||
// No indents in this text,
|
||||
// it's probably just something
|
||||
// inline like `alt="whatever"`.
|
||||
return match
|
||||
}
|
||||
|
||||
// Find the shortest indent as this
|
||||
// is undoubtedly the one we added.
|
||||
//
|
||||
// By targeting the shortest one we
|
||||
// avoid removing user-inserted
|
||||
// whitespace at the start of lines
|
||||
// of alt text (eg., in poetry etc).
|
||||
slices.Sort(indents)
|
||||
indent := indents[0]
|
||||
|
||||
// Load or create + store the
|
||||
// regex to replace this indent,
|
||||
// avoiding recompilation.
|
||||
var replaceIndent *regexp.Regexp
|
||||
if replaceIndentI, ok := replaceIndents.Load(indent); ok {
|
||||
// Got regex for this indent.
|
||||
replaceIndent = replaceIndentI.(*regexp.Regexp)
|
||||
} else {
|
||||
// No regex stored for
|
||||
// this indent yet, store it.
|
||||
replaceIndent = regexp.MustCompile(`(?m)^` + indent)
|
||||
replaceIndents.Store(indent, replaceIndent)
|
||||
}
|
||||
|
||||
// Remove all occurrences of the indent
|
||||
// at the start of a line in the match.
|
||||
return replaceIndent.ReplaceAllString(match, "")
|
||||
},
|
||||
)
|
||||
|
||||
return noescape(output)
|
||||
}
|
||||
|
||||
|
@@ -22,10 +22,19 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestOutdentPre(t *testing.T) {
|
||||
func TestOutdentPreformatted(t *testing.T) {
|
||||
const html = template.HTML(`
|
||||
<div class="text">
|
||||
<div class="content" lang="en">
|
||||
<div
|
||||
class="content"
|
||||
lang="en"
|
||||
title="DW from Arthur is labeled "crawlers".
|
||||
|
||||
She's reading a sign on a door that says: "robots.txt: don't crawl this website, it's not for you, please, thanks."
|
||||
|
||||
With her hands on her hips looking annoyed she says "That sign won't stop me because I can't read!""
|
||||
alt="pee pee poo poo"
|
||||
>
|
||||
<p>Here's a bunch of HTML, read it and weep, weep then!</p>
|
||||
<pre><code class="language-html"><section class="about-user">
|
||||
<div class="col-header">
|
||||
@@ -67,7 +76,15 @@ func TestOutdentPre(t *testing.T) {
|
||||
</div>
|
||||
</div>
|
||||
<div class="text">
|
||||
<div class="content" lang="en">
|
||||
<div
|
||||
class="content"
|
||||
lang="en"
|
||||
alt="DW from Arthur is labeled "crawlers".
|
||||
|
||||
She's reading a sign on a door that says: "robots.txt: don't crawl this website, it's not for you, please, thanks."
|
||||
|
||||
With her hands on her hips looking annoyed she says "That sign won't stop me because I can't read!""
|
||||
>
|
||||
<p>Here's a bunch of HTML, read it and weep, weep then!</p>
|
||||
<pre><code class="language-html"><section class="about-user">
|
||||
<div class="col-header">
|
||||
@@ -112,7 +129,16 @@ func TestOutdentPre(t *testing.T) {
|
||||
|
||||
const expected = template.HTML(`
|
||||
<div class="text">
|
||||
<div class="content" lang="en">
|
||||
<div
|
||||
class="content"
|
||||
lang="en"
|
||||
title="DW from Arthur is labeled "crawlers".
|
||||
|
||||
She's reading a sign on a door that says: "robots.txt: don't crawl this website, it's not for you, please, thanks."
|
||||
|
||||
With her hands on her hips looking annoyed she says "That sign won't stop me because I can't read!""
|
||||
alt="pee pee poo poo"
|
||||
>
|
||||
<p>Here's a bunch of HTML, read it and weep, weep then!</p>
|
||||
<pre><code class="language-html"><section class="about-user">
|
||||
<div class="col-header">
|
||||
@@ -154,7 +180,15 @@ func TestOutdentPre(t *testing.T) {
|
||||
</div>
|
||||
</div>
|
||||
<div class="text">
|
||||
<div class="content" lang="en">
|
||||
<div
|
||||
class="content"
|
||||
lang="en"
|
||||
alt="DW from Arthur is labeled "crawlers".
|
||||
|
||||
She's reading a sign on a door that says: "robots.txt: don't crawl this website, it's not for you, please, thanks."
|
||||
|
||||
With her hands on her hips looking annoyed she says "That sign won't stop me because I can't read!""
|
||||
>
|
||||
<p>Here's a bunch of HTML, read it and weep, weep then!</p>
|
||||
<pre><code class="language-html"><section class="about-user">
|
||||
<div class="col-header">
|
||||
@@ -197,7 +231,7 @@ func TestOutdentPre(t *testing.T) {
|
||||
</div>
|
||||
`)
|
||||
|
||||
out := outdentPre(html)
|
||||
out := outdentPreformatted(html)
|
||||
if out != expected {
|
||||
t.Fatalf("unexpected output:\n`%s`\n", out)
|
||||
}
|
||||
|
@@ -79,7 +79,7 @@ image/webp
|
||||
{{- include "page_header.tmpl" . | indent 3 }}
|
||||
</header>
|
||||
<div class="page-content">
|
||||
{{- include .pageContent . | indent 3 | outdentPre }}
|
||||
{{- include .pageContent . | indent 3 | outdentPreformatted }}
|
||||
</div>
|
||||
<footer class="page-footer">
|
||||
{{- include "page_footer.tmpl" . | indent 3 }}
|
||||
|
Reference in New Issue
Block a user