[bug] respect X-Robots-Tag and robots.txt on api/v1/instance and nodeinfo (#3756)

* feat: check X-Robots-Tag

when accessing /api/v1/instance or /nodeinfo endpoints respect
X-Robots-Tag

* chore: go fmt ./...

* Check robots.txt as well, add tests

---------

Co-authored-by: tobi <tobi.smethurst@protonmail.com>
This commit is contained in:
alemi.dev
2025-02-11 13:16:14 +01:00
committed by GitHub
parent 2c95fd4115
commit d0de3ad492
20 changed files with 1404 additions and 24 deletions

View File

@ -36,6 +36,8 @@ const (
TextHTML = `text/html`
TextCSS = `text/css`
TextCSV = `text/csv`
TextPlain = `text/plain`
UTF8 = `utf-8`
)
// JSONContentType returns whether is application/json(;charset=utf-8)? content-type.
@ -74,6 +76,14 @@ func XMLXRDContentType(ct string) bool {
p[0] == AppXMLXRD
}
// TextPlainContentType returns whether is text/plain(;charset=utf-8)? content-type.
func TextPlainContentType(ct string) bool {
p := splitContentType(ct)
p, ok := isUTF8ContentType(p)
return ok && len(p) == 1 &&
p[0] == TextPlain
}
// ASContentType returns whether is valid ActivityStreams content-types:
// - application/activity+json
// - application/ld+json;profile=https://w3.org/ns/activitystreams
@ -118,7 +128,7 @@ func NodeInfo2ContentType(ct string) bool {
// type parts list, removes it and returns whether is utf-8.
func isUTF8ContentType(p []string) ([]string, bool) {
const charset = "charset="
const charsetUTF8 = charset + "utf-8"
const charsetUTF8 = charset + UTF8
for i, part := range p {
// Only handle charset slice parts.