mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[bug] respect X-Robots-Tag
and robots.txt
on api/v1/instance and nodeinfo (#3756)
* feat: check X-Robots-Tag when accessing /api/v1/instance or /nodeinfo endpoints respect X-Robots-Tag * chore: go fmt ./... * Check robots.txt as well, add tests --------- Co-authored-by: tobi <tobi.smethurst@protonmail.com>
This commit is contained in:
185
vendor/github.com/temoto/robotstxt/scanner.go
generated
vendored
Normal file
185
vendor/github.com/temoto/robotstxt/scanner.go
generated
vendored
Normal file
@ -0,0 +1,185 @@
|
||||
package robotstxt
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"go/token"
|
||||
"os"
|
||||
"sync"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type byteScanner struct {
|
||||
pos token.Position
|
||||
buf []byte
|
||||
ErrorCount int
|
||||
ch rune
|
||||
Quiet bool
|
||||
keyTokenFound bool
|
||||
lastChunk bool
|
||||
}
|
||||
|
||||
const tokEOL = "\n"
|
||||
|
||||
var WhitespaceChars = []rune{' ', '\t', '\v'}
|
||||
var tokBuffers = sync.Pool{New: func() interface{} { return bytes.NewBuffer(make([]byte, 32)) }}
|
||||
|
||||
func newByteScanner(srcname string, quiet bool) *byteScanner {
|
||||
return &byteScanner{
|
||||
Quiet: quiet,
|
||||
ch: -1,
|
||||
pos: token.Position{Filename: srcname},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *byteScanner) feed(input []byte, end bool) {
|
||||
s.buf = input
|
||||
s.pos.Offset = 0
|
||||
s.pos.Line = 1
|
||||
s.pos.Column = 1
|
||||
s.lastChunk = end
|
||||
|
||||
// Read first char into look-ahead buffer `s.ch`.
|
||||
if !s.nextChar() {
|
||||
return
|
||||
}
|
||||
|
||||
// Skip UTF-8 byte order mark
|
||||
if s.ch == 65279 {
|
||||
s.nextChar()
|
||||
s.pos.Column = 1
|
||||
}
|
||||
}
|
||||
|
||||
func (s *byteScanner) GetPosition() token.Position {
|
||||
return s.pos
|
||||
}
|
||||
|
||||
func (s *byteScanner) scan() string {
|
||||
// Note Offset > len, not >=, so we can scan last character.
|
||||
if s.lastChunk && s.pos.Offset > len(s.buf) {
|
||||
return ""
|
||||
}
|
||||
|
||||
s.skipSpace()
|
||||
|
||||
if s.ch == -1 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// EOL
|
||||
if s.isEol() {
|
||||
s.keyTokenFound = false
|
||||
// skip subsequent newline chars
|
||||
for s.ch != -1 && s.isEol() {
|
||||
s.nextChar()
|
||||
}
|
||||
// emit newline as separate token
|
||||
return tokEOL
|
||||
}
|
||||
|
||||
// skip comments
|
||||
if s.ch == '#' {
|
||||
s.keyTokenFound = false
|
||||
s.skipUntilEol()
|
||||
if s.ch == -1 {
|
||||
return ""
|
||||
}
|
||||
// emit newline as separate token
|
||||
return tokEOL
|
||||
}
|
||||
|
||||
// else we found something
|
||||
tok := tokBuffers.Get().(*bytes.Buffer)
|
||||
defer tokBuffers.Put(tok)
|
||||
tok.Reset()
|
||||
tok.WriteRune(s.ch)
|
||||
s.nextChar()
|
||||
for s.ch != -1 && !s.isSpace() && !s.isEol() {
|
||||
// Do not consider ":" to be a token separator if a first key token
|
||||
// has already been found on this line (avoid cutting an absolute URL
|
||||
// after the "http:")
|
||||
if s.ch == ':' && !s.keyTokenFound {
|
||||
s.nextChar()
|
||||
s.keyTokenFound = true
|
||||
break
|
||||
}
|
||||
|
||||
tok.WriteRune(s.ch)
|
||||
s.nextChar()
|
||||
}
|
||||
return tok.String()
|
||||
}
|
||||
|
||||
func (s *byteScanner) scanAll() []string {
|
||||
results := make([]string, 0, 64) // random guess of average tokens length
|
||||
for {
|
||||
token := s.scan()
|
||||
if token != "" {
|
||||
results = append(results, token)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func (s *byteScanner) error(pos token.Position, msg string) {
|
||||
s.ErrorCount++
|
||||
if !s.Quiet {
|
||||
fmt.Fprintf(os.Stderr, "robotstxt from %s: %s\n", pos.String(), msg)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *byteScanner) isEol() bool {
|
||||
return s.ch == '\n' || s.ch == '\r'
|
||||
}
|
||||
|
||||
func (s *byteScanner) isSpace() bool {
|
||||
for _, r := range WhitespaceChars {
|
||||
if s.ch == r {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (s *byteScanner) skipSpace() {
|
||||
for s.ch != -1 && s.isSpace() {
|
||||
s.nextChar()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *byteScanner) skipUntilEol() {
|
||||
for s.ch != -1 && !s.isEol() {
|
||||
s.nextChar()
|
||||
}
|
||||
// skip subsequent newline chars
|
||||
for s.ch != -1 && s.isEol() {
|
||||
s.nextChar()
|
||||
}
|
||||
}
|
||||
|
||||
// Reads next Unicode char.
|
||||
func (s *byteScanner) nextChar() bool {
|
||||
if s.pos.Offset >= len(s.buf) {
|
||||
s.ch = -1
|
||||
return false
|
||||
}
|
||||
s.pos.Column++
|
||||
if s.ch == '\n' {
|
||||
s.pos.Line++
|
||||
s.pos.Column = 1
|
||||
}
|
||||
r, w := rune(s.buf[s.pos.Offset]), 1
|
||||
if r >= 0x80 {
|
||||
r, w = utf8.DecodeRune(s.buf[s.pos.Offset:])
|
||||
if r == utf8.RuneError && w == 1 {
|
||||
s.error(s.pos, "illegal UTF-8 encoding")
|
||||
}
|
||||
}
|
||||
s.pos.Column++
|
||||
s.pos.Offset += w
|
||||
s.ch = r
|
||||
return true
|
||||
}
|
Reference in New Issue
Block a user