mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2025-06-05 21:59:39 +02:00
[bug] respect X-Robots-Tag
and robots.txt
on api/v1/instance and nodeinfo (#3756)
* feat: check X-Robots-Tag when accessing /api/v1/instance or /nodeinfo endpoints respect X-Robots-Tag * chore: go fmt ./... * Check robots.txt as well, add tests --------- Co-authored-by: tobi <tobi.smethurst@protonmail.com>
This commit is contained in:
91
internal/transport/derefrobots.go
Normal file
91
internal/transport/derefrobots.go
Normal file
@@ -0,0 +1,91 @@
|
||||
// GoToSocial
|
||||
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
||||
// SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package transport
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"codeberg.org/gruf/go-bytesize"
|
||||
"codeberg.org/gruf/go-iotools"
|
||||
apiutil "github.com/superseriousbusiness/gotosocial/internal/api/util"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
|
||||
"github.com/temoto/robotstxt"
|
||||
)
|
||||
|
||||
func (t *transport) DereferenceRobots(ctx context.Context, protocol string, host string) (*robotstxt.RobotsData, error) {
|
||||
robotsIRI := &url.URL{
|
||||
Scheme: protocol,
|
||||
Host: host,
|
||||
Path: "robots.txt",
|
||||
}
|
||||
|
||||
// Build IRI just once
|
||||
iriStr := robotsIRI.String()
|
||||
|
||||
// Prepare new HTTP request to endpoint
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", iriStr, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// We want text/plain utf-8 encoding.
|
||||
//
|
||||
// https://www.rfc-editor.org/rfc/rfc9309.html#name-access-method
|
||||
req.Header.Add("Accept", apiutil.TextPlain)
|
||||
req.Header.Add("Accept-Charset", apiutil.UTF8)
|
||||
|
||||
// Perform the HTTP request
|
||||
rsp, err := t.GET(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Ensure a non-error status response.
|
||||
if rsp.StatusCode != http.StatusOK {
|
||||
err := gtserror.NewFromResponse(rsp)
|
||||
_ = rsp.Body.Close() // close early.
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Ensure that the incoming request content-type is expected.
|
||||
if ct := rsp.Header.Get("Content-Type"); !apiutil.TextPlainContentType(ct) {
|
||||
err := gtserror.Newf("non text/plain response: %s", ct)
|
||||
_ = rsp.Body.Close() // close early.
|
||||
return nil, gtserror.SetMalformed(err)
|
||||
}
|
||||
|
||||
// Limit the robots.txt size to 500KiB
|
||||
//
|
||||
// https://www.rfc-editor.org/rfc/rfc9309.html#name-limits
|
||||
const maxsz = int64(500 * bytesize.KiB)
|
||||
|
||||
// Check body claims to be within size limit.
|
||||
if rsp.ContentLength > maxsz {
|
||||
_ = rsp.Body.Close() // close early.
|
||||
sz := bytesize.Size(maxsz) //nolint:gosec
|
||||
return nil, gtserror.Newf("robots.txt body exceeds max size %s", sz)
|
||||
}
|
||||
|
||||
// Update response body with maximum size.
|
||||
rsp.Body, _, _ = iotools.UpdateReadCloserLimit(rsp.Body, maxsz)
|
||||
defer rsp.Body.Close()
|
||||
|
||||
return robotstxt.FromResponse(rsp)
|
||||
}
|
Reference in New Issue
Block a user